diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll index 424388a30e99b..d1a303b41deef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -24,12 +24,12 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_max_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f32: ; GFX11: ; %bb.0: @@ -96,12 +96,12 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_max_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_max_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f32: ; GFX11: ; %bb.0: @@ -168,14 +168,14 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: ds_max_rtn_f64 v[0:1], v0, v[4:5] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: ds_max_rtn_f64 v[0:1], v0, v[4:5] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f64: ; GFX11: ; %bb.0: @@ -244,14 +244,14 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: ds_max_f64 v0, v[4:5] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: ds_max_f64 v0, v[4:5] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f64: ; GFX11: ; %bb.0: @@ -320,30 +320,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -466,29 +466,29 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -626,14 +626,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -781,14 +781,14 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -911,30 +911,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1053,29 +1053,29 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1212,14 +1212,14 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1365,14 +1365,14 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1497,32 +1497,32 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v3, v1, v1 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v3, v1, v1 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1653,31 +1653,31 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v3, v0, v0 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v0, v1, v1 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v3, v0, v0 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v0, v1, v1 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1826,15 +1826,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1996,15 +1996,15 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll index b52a39f1a55c8..b8538cbf254fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -24,12 +24,12 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_min_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f32: ; GFX11: ; %bb.0: @@ -96,12 +96,12 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_min_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_min_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f32: ; GFX11: ; %bb.0: @@ -168,14 +168,14 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: ds_min_rtn_f64 v[0:1], v0, v[4:5] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: ds_min_rtn_f64 v[0:1], v0, v[4:5] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f64: ; GFX11: ; %bb.0: @@ -244,14 +244,14 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: ds_min_f64 v0, v[4:5] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: ds_min_f64 v0, v[4:5] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f64: ; GFX11: ; %bb.0: @@ -320,30 +320,30 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -466,29 +466,29 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -626,14 +626,14 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -781,14 +781,14 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -911,30 +911,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1053,29 +1053,29 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1212,14 +1212,14 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1365,14 +1365,14 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1497,32 +1497,32 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_m ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v3, v1, v1 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v3, v1, v1 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1653,31 +1653,31 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v3, v0, v0 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v0, v1, v1 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v3, v0, v0 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v0, v1, v1 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1826,15 +1826,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1996,15 +1996,15 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll index 0816eae28f614..714328a42d675 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s @@ -20,19 +20,19 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -67,20 +67,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -116,20 +116,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -167,22 +167,22 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -219,19 +219,19 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -266,20 +266,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -315,20 +315,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn ; GFX12: bb.1 (%ir-block.0): @@ -366,22 +366,22 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn ; GFX12: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll index c0b84c914ce5c..fb95d99e9f65b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll @@ -1,24 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.1 (%ir-block.0): @@ -54,21 +54,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> } define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.1 (%ir-block.0): @@ -106,21 +106,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i } define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.1 (%ir-block.0): @@ -158,23 +158,23 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i } define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.1 (%ir-block.0): @@ -216,20 +216,20 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn ; GFX11: bb.1 (%ir-block.0): @@ -265,21 +265,21 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn ; GFX11: bb.1 (%ir-block.0): @@ -317,21 +317,21 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn ; GFX11: bb.1 (%ir-block.0): @@ -369,23 +369,23 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn ; GFX11: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll index 7c4069b4b3138..f71f573e5a799 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll @@ -1,391 +1,391 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll index 9514bea86e4d1..3ef735ddb7635 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn @@ -18,19 +18,19 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void } @@ -51,20 +51,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -85,20 +85,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -121,22 +121,22 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } @@ -156,19 +156,19 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void } @@ -189,20 +189,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -223,20 +223,20 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -259,22 +259,22 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll index 632ad55fdf892..756f287b77988 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll @@ -1,165 +1,165 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index 7a97ac8211f67..340e293cda7b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): @@ -30,17 +30,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da } define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.1 (%ir-block.0): @@ -58,16 +58,16 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data } define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): @@ -84,17 +84,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da } define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index c9ab351f94016..a3562a18631d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -1,43 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll index 0896e4dc7af14..5909fe3d3694a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll @@ -1,66 +1,66 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_rtn - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_rtn + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_saddr_rtn - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_saddr_rtn + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[COPY3]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn(ptr %ptr, <2 x half> %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_no_rtn - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_no_rtn + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps void @flat_atomic_fadd_v2f16_saddr_no_rtn(ptr inreg %ptr, <2 x half> %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_saddr_no_rtn - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_v2f16_saddr_no_rtn + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: FLAT_ATOMIC_PK_ADD_F16 [[COPY3]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll index 9b9249b62b0bc..00c44c27257bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll @@ -1,6 +1,6 @@ ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -mcpu=gfx900 | FileCheck -check-prefixes=GCN,RW-FLAT %s ; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s -; RUN: opt -passes=amdgpu-attributor -mcpu=gfx940 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s +; RUN: opt -passes=amdgpu-attributor -mcpu=gfx942 < %s | llc | FileCheck -check-prefixes=GCN,RO-FLAT %s target triple = "amdgcn-amd-amdhsa" diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 7cafa2f608a4b..38346dd568694 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX10 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel -mattr=-unaligned-access-mode -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=UNALIGNED_GFX12 %s @@ -46,19 +46,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_kernel: ; GFX11: ; %bb.0: ; %bb @@ -122,19 +122,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_sindex_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_sindex_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 +; UNALIGNED_GFX942-NEXT: s_waitcnt lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; UNALIGNED_GFX942-NEXT: s_and_b32 s0, s0, 15 +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_sindex_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -204,18 +204,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_kernel: ; GFX11: ; %bb.0: ; %bb @@ -274,18 +274,18 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_vindex_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -356,19 +356,19 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_foo: ; GFX11: ; %bb.0: ; %bb @@ -432,19 +432,19 @@ define void @store_load_vindex_foo(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_foo: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_vindex_foo: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_foo: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -503,13 +503,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) { ; GFX10-NEXT: scratch_store_dword v0, v1, off offset:4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: private_ptr_foo: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: private_ptr_foo: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: private_ptr_foo: ; GFX11: ; %bb.0: @@ -544,13 +544,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:4 ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: private_ptr_foo: -; UNALIGNED_GFX940: ; %bb.0: -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: private_ptr_foo: +; UNALIGNED_GFX942: ; %bb.0: +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 0x41200000 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:4 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: private_ptr_foo: ; UNALIGNED_GFX11: ; %bb.0: @@ -617,23 +617,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_small_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x100 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x100 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_small_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x100 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x100 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_small_offset_kernel: ; GFX11: ; %bb.0: ; %bb @@ -713,23 +713,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_sindex_small_offset_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 -; UNALIGNED_GFX940-NEXT: s_addk_i32 s1, 0x100 -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_addk_i32 s0, 0x100 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_sindex_small_offset_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 +; UNALIGNED_GFX942-NEXT: s_waitcnt lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; UNALIGNED_GFX942-NEXT: s_and_b32 s0, s0, 15 +; UNALIGNED_GFX942-NEXT: s_addk_i32 s1, 0x100 +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_addk_i32 s0, 0x100 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_sindex_small_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -819,21 +819,21 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_small_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v1, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0x100, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_small_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_small_offset_kernel: ; GFX11: ; %bb.0: ; %bb @@ -906,21 +906,21 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, 0x100, v0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_vindex_small_offset_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, off offset:256 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1007,22 +1007,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_small_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; GFX940-NEXT: v_add_u32_e32 v1, 0x100, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_small_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_small_offset_foo: ; GFX11: ; %bb.0: ; %bb @@ -1098,22 +1098,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_small_offset_foo: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 0x100, v1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_vindex_small_offset_foo: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_small_offset_foo: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1206,23 +1206,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_large_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x4004 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_large_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x4004 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x4004 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_sindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb @@ -1302,23 +1302,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_sindex_large_offset_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15 -; UNALIGNED_GFX940-NEXT: s_addk_i32 s1, 0x4004 -; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_addk_i32 s0, 0x4004 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_sindex_large_offset_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 +; UNALIGNED_GFX942-NEXT: s_waitcnt lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; UNALIGNED_GFX942-NEXT: s_and_b32 s0, s0, 15 +; UNALIGNED_GFX942-NEXT: s_addk_i32 s1, 0x4004 +; UNALIGNED_GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_addk_i32 s0, 0x4004 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_sindex_large_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1408,22 +1408,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_large_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0x4004, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_large_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: s_movk_i32 s0, 0x4004 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0x4004, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb @@ -1497,22 +1497,22 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x4004 -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, 0x4004, v0 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_vindex_large_offset_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x4004 +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, 0x4004, v0 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1600,23 +1600,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_large_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; GFX940-NEXT: v_add_u32_e32 v1, 0x4004, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_large_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; GFX942-NEXT: v_add_u32_e32 v1, 0x4004, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_vindex_large_offset_foo: ; GFX11: ; %bb.0: ; %bb @@ -1693,23 +1693,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_vindex_large_offset_foo: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, s32, v1 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 0x4004, v1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, 15 -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_vindex_large_offset_foo: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, s32, v1 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, 0x4004, v1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v2, 15 +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_vindex_large_offset_foo: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1796,19 +1796,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_large_imm_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_mov_b32_e32 v0, 13 -; GFX940-NEXT: s_movk_i32 s0, 0x3e80 -; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_add_i32 s0, s0, 4 -; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_large_imm_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: s_movk_i32 s0, 0x3e80 +; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_add_i32 s0, s0, 4 +; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_large_imm_offset_kernel: ; GFX11: ; %bb.0: ; %bb @@ -1870,19 +1870,19 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_kernel: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 13 -; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x3e80 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s0, 4 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 +; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -1952,21 +1952,21 @@ define void @store_load_large_imm_offset_foo() { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_large_imm_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0x3e80 -; GFX940-NEXT: v_mov_b32_e32 v0, 13 -; GFX940-NEXT: s_add_i32 s1, s32, s0 -; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_add_i32 s0, s1, 4 -; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_large_imm_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0x3e80 +; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: s_add_i32 s1, s32, s0 +; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_add_i32 s0, s1, 4 +; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_large_imm_offset_foo: ; GFX11: ; %bb.0: ; %bb @@ -2033,21 +2033,21 @@ define void @store_load_large_imm_offset_foo() { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_large_imm_offset_foo: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_movk_i32 s0, 0x3e80 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 13 -; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s32, s0 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX940-NEXT: s_add_i32 s0, s1, 4 -; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 +; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4 +; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_foo: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -2123,18 +2123,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vidx_sidx_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vidx_sidx_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_lshl_u32 v0, s0, v0, 2 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: store_load_vidx_sidx_offset: ; GFX11: ; %bb.0: ; %bb @@ -2192,18 +2192,18 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: store_load_vidx_sidx_offset: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_load_dword s0, s[4:5], 0x0 -; UNALIGNED_GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_lshl_u32 v0, s0, v0, 2 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: store_load_vidx_sidx_offset: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_load_dword s0, s[4:5], 0x0 +; UNALIGNED_GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX942-NEXT: s_waitcnt lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_lshl_u32 v0, s0, v0, 2 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: store_load_vidx_sidx_offset: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -2264,15 +2264,15 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i64_aligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 -; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i64_aligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 15 +; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_i64_aligned: ; GFX11: ; %bb.0: ; %bb @@ -2323,15 +2323,15 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_i64_aligned: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 -; UNALIGNED_GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_i64_aligned: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b64_e32 v[2:3], 15 +; UNALIGNED_GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_i64_aligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -2388,15 +2388,15 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i64_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 -; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i64_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 15 +; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_i64_unaligned: ; GFX11: ; %bb.0: ; %bb @@ -2525,59 +2525,59 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_i64_unaligned: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 15 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 4, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v3, 1, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 6, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v3, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 5, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 7, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v0, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr3 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v5, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v7, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v6, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v8, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_i64_unaligned: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v4, 15 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, 4, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v3, 1, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v0, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v4, 0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v6, 6, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v3, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v2, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v5, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v7, 5, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v1, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v7, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v8, 7, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v6, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v8, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v0, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr3 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v5, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v7, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v6, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v4, v8, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_i64_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -2703,20 +2703,20 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_v3i32_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 3 -; GFX940-NEXT: s_mov_b32 s1, 2 -; GFX940-NEXT: s_mov_b32 s0, 1 -; GFX940-NEXT: v_mov_b32_e32 v4, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, s1 -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_v3i32_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 3 +; GFX942-NEXT: s_mov_b32 s1, 2 +; GFX942-NEXT: s_mov_b32 s0, 1 +; GFX942-NEXT: v_mov_b32_e32 v4, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, s1 +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_v3i32_unaligned: ; GFX11: ; %bb.0: ; %bb @@ -2900,85 +2900,85 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_v3i32_unaligned: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 4, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 6, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 8, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 10, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v12, 3 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 5, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 9, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v12, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 11, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v6, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_v3i32_unaligned: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v0, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v6, 4, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v7, 6, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v9, 8, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v10, 10, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v12, 3 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v4, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v2, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v5, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v8, 5, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v6, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v8, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v7, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v1, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v11, 9, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v9, v12, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v11, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v12, 11, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v10, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v12, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v6, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_v3i32_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -3146,20 +3146,20 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_v4i32_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s3, 4 -; GFX940-NEXT: s_mov_b32 s2, 3 -; GFX940-NEXT: s_mov_b32 s1, 2 -; GFX940-NEXT: s_mov_b32 s0, 1 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], s[2:3] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_v4i32_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s3, 4 +; GFX942-NEXT: s_mov_b32 s2, 3 +; GFX942-NEXT: s_mov_b32 s1, 2 +; GFX942-NEXT: s_mov_b32 s0, 1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: store_load_v4i32_unaligned: ; GFX11: ; %bb.0: ; %bb @@ -3390,109 +3390,109 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; -; UNALIGNED_GFX940-LABEL: store_load_v4i32_unaligned: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v6, 4 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 4, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 6, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 8, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 10, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v13, 3 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v14, 12, v0 -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v15, 14, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 5, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 9, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v13, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v13, 11, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v13, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v16, 13, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v14, v6, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v16, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 15, v0 -; UNALIGNED_GFX940-NEXT: scratch_store_byte v15, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v3, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v13, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v14, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v16, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v15, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr16 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr15 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr13 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr14 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 -; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 -; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v0, v6, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] +; UNALIGNED_GFX942-LABEL: store_load_v4i32_unaligned: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v0, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v6, 4 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v7, 4, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v8, 6, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v10, 8, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v11, 10, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v13, 3 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v14, 12, v0 +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v15, 14, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v4, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v2, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v5, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v9, 5, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v7, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v9, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v8, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v1, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v12, 9, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v10, v13, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v12, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v13, 11, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v11, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v13, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v16, 13, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v14, v6, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v16, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v6, 15, v0 +; UNALIGNED_GFX942-NEXT: scratch_store_byte v15, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_store_byte v6, v3, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v13, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v14, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v16, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v3, v15, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr16 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr15 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr13 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr14 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX942-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX942-NEXT: scratch_load_ubyte v0, v6, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_v4i32_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -3685,13 +3685,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_large_offset: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_add_u32 s0, s0, 0xffe8 -; GFX940-NEXT: scratch_load_dword v2, off, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_large_offset: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_add_u32 s0, s0, 0xffe8 +; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: sgpr_base_large_offset: ; GFX11: ; %bb.0: ; %entry @@ -3730,13 +3730,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa ; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset: -; UNALIGNED_GFX940: ; %bb.0: ; %entry -; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0xffe8 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: sgpr_base_large_offset: +; UNALIGNED_GFX942: ; %bb.0: ; %entry +; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffe8 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset: ; UNALIGNED_GFX11: ; %bb.0: ; %entry @@ -3784,14 +3784,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_large_offset_split: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_and_b32 s0, s0, -4 -; GFX940-NEXT: s_add_u32 s0, s0, 0x100ffe8 -; GFX940-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_large_offset_split: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_and_b32 s0, s0, -4 +; GFX942-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; GFX942-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: sgpr_base_large_offset_split: ; GFX11: ; %bb.0: ; %entry @@ -3837,14 +3837,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a ; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: sgpr_base_large_offset_split: -; UNALIGNED_GFX940: ; %bb.0: ; %entry -; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, -4 -; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0x100ffe8 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: sgpr_base_large_offset_split: +; UNALIGNED_GFX942: ; %bb.0: ; %entry +; UNALIGNED_GFX942-NEXT: s_and_b32 s0, s0, -4 +; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: sgpr_base_large_offset_split: ; UNALIGNED_GFX11: ; %bb.0: ; %entry @@ -3902,15 +3902,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 0xffe8 -; GFX940-NEXT: v_add3_u32 v0, s0, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_add_u32_e32 v0, s1, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0xffe8 +; GFX942-NEXT: v_add3_u32 v0, s0, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: ; GFX11: ; %bb.0: ; %bb @@ -3955,15 +3955,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 0xffe8 -; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, v1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s1, v0 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 0xffe8 +; UNALIGNED_GFX942-NEXT: v_add3_u32 v0, s0, v0, v1 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -4015,14 +4015,14 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 -; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_add_u32_e32 v0, s1, v0 +; GFX942-NEXT: v_add3_u32 v0, s0, v0, -16 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: ; GFX11: ; %bb.0: ; %bb @@ -4066,14 +4066,14 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: -; UNALIGNED_GFX940: ; %bb.0: ; %bb -; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0 -; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; UNALIGNED_GFX942: ; %bb.0: ; %bb +; UNALIGNED_GFX942-NEXT: v_add_u32_e32 v0, s1, v0 +; UNALIGNED_GFX942-NEXT: v_add3_u32 v0, s0, v0, -16 +; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v1, 15 +; UNALIGNED_GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: ; UNALIGNED_GFX11: ; %bb.0: ; %bb @@ -4122,13 +4122,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_negative_offset: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8 -; GFX940-NEXT: scratch_load_dword v2, off, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_negative_offset: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8 +; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: sgpr_base_negative_offset: ; GFX11: ; %bb.0: ; %entry @@ -4165,13 +4165,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; UNALIGNED_GFX10-NEXT: global_store_dword v[0:1], v2, off ; UNALIGNED_GFX10-NEXT: s_endpgm ; -; UNALIGNED_GFX940-LABEL: sgpr_base_negative_offset: -; UNALIGNED_GFX940: ; %bb.0: ; %entry -; UNALIGNED_GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8 -; UNALIGNED_GFX940-NEXT: scratch_load_dword v2, off, s0 -; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; UNALIGNED_GFX940-NEXT: s_endpgm +; UNALIGNED_GFX942-LABEL: sgpr_base_negative_offset: +; UNALIGNED_GFX942: ; %bb.0: ; %entry +; UNALIGNED_GFX942-NEXT: s_add_u32 s0, s0, 0xffffffe8 +; UNALIGNED_GFX942-NEXT: scratch_load_dword v2, off, s0 +; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX942-NEXT: global_store_dword v[0:1], v2, off +; UNALIGNED_GFX942-NEXT: s_endpgm ; ; UNALIGNED_GFX11-LABEL: sgpr_base_negative_offset: ; UNALIGNED_GFX11: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll deleted file mode 100644 index 676298670f1fa..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll +++ /dev/null @@ -1,132 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940 - -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { -; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 - ret void -} - -define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { -; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm - %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 - ret void -} - -define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { -; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] - %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 - ret float %ret -} - -define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) { -; GFX940-LABEL: local_atomic_fadd_ret_v2f16_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383 - %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst - ret <2 x half> %result -} - -define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) { -; GFX940-LABEL: local_atomic_fadd_noret_v2f16_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_f16 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383 - %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst - ret void -} - -define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) { -; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256 - %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst - ret <2 x half> %result -} - -define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) { -; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256 - %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst - ret void -} - -define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) { -; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i32 256 - %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst - ret <2 x half> %result -} - -define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) { -; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i32 256 - %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst - ret void -} - -attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" } - -!0 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll new file mode 100644 index 0000000000000..6792612ded368 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefix=GFX942 + +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { +; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { +; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 + ret void +} + +define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { +; GFX942-LABEL: flat_atomic_fadd_f32_rtn_pat: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] + %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0 + ret float %ret +} + +define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) { +; GFX942-LABEL: local_atomic_fadd_ret_v2f16_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383 + %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst + ret <2 x half> %result +} + +define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) { +; GFX942-LABEL: local_atomic_fadd_noret_v2f16_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383 + %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst + ret void +} + +define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) { +; GFX942-LABEL: global_atomic_fadd_ret_v2f16_agent_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256 + %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst + ret <2 x half> %result +} + +define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) { +; GFX942-LABEL: global_atomic_fadd_noret_v2f16_agent_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256 + %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst + ret void +} + +define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) { +; GFX942-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr %ptr, i32 256 + %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst + ret <2 x half> %result +} + +define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) { +; GFX942-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] + %gep = getelementptr <2 x half>, ptr %ptr, i32 256 + %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst + ret void +} + +attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" } + +!0 = !{} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index 8ff2f59964ab5..11c17c21e189d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX940 +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX942 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) @@ -27,16 +27,16 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -50,12 +50,12 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -78,20 +78,20 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -110,16 +110,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -133,12 +133,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -161,20 +161,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -193,16 +193,16 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -216,12 +216,12 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -244,20 +244,20 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -276,16 +276,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -299,12 +299,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -327,20 +327,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -359,16 +359,16 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -382,12 +382,12 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -410,20 +410,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -442,16 +442,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -465,12 +465,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -493,20 +493,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -525,16 +525,16 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -548,12 +548,12 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -576,20 +576,20 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -608,16 +608,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -631,12 +631,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -659,20 +659,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -691,16 +691,16 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -714,12 +714,12 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -742,20 +742,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -774,16 +774,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -797,12 +797,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -825,20 +825,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -857,16 +857,16 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -880,12 +880,12 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -908,20 +908,20 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -940,16 +940,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -963,12 +963,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -991,20 +991,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -1035,27 +1035,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: .LBB36_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB36_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: .LBB36_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB36_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: .LBB36_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1083,27 +1083,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX90A-NEXT: .LBB37_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB37_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB37_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB37_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: .LBB37_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1133,27 +1133,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX90A-NEXT: .LBB38_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB38_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: .LBB38_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB38_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: .LBB38_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1181,27 +1181,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX90A-NEXT: .LBB39_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB39_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB39_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_flush: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB39_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: .LBB39_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1220,15 +1220,15 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1245,15 +1245,15 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1272,15 +1272,15 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1308,27 +1308,27 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: .LBB43_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB43_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: .LBB43_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB43_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: .LBB43_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1349,17 +1349,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1378,17 +1378,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1409,17 +1409,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1438,15 +1438,15 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1463,15 +1463,15 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1491,16 +1491,16 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1519,17 +1519,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1556,25 +1556,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX90A-NEXT: .LBB51_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB51_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB51_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB51_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB51_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1601,25 +1601,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX90A-NEXT: .LBB52_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB52_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB52_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB52_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB52_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1646,25 +1646,25 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX90A-NEXT: .LBB53_2: ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB53_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB53_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB53_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 +; GFX942-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB53_2: +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1680,13 +1680,13 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll index 6459110dd8bbb..dbfb4e6ebe7e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-atomic-optimizer-strategy=DPP -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s @@ -28,16 +28,16 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX90A-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX90A-NEXT: S_ENDPGM 0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): @@ -198,77 +198,77 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX90A-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) - ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE - ; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.2 (%ir-block.5): - ; GFX940-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec - ; GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 - ; GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 - ; GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec - ; GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF]], implicit $exec - ; GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec - ; GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec - ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.3 (%ir-block.31): - ; GFX940-NEXT: successors: %bb.4(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.4.Flow: - ; GFX940-NEXT: successors: %bb.5(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.5 (%ir-block.33): - ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE + ; GFX942-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.2 (%ir-block.5): + ; GFX942-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX942-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX942-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF]], implicit $exec + ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] + ; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] + ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY17]], implicit $exec + ; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY18]], implicit $exec + ; GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.3 (%ir-block.31): + ; GFX942-NEXT: successors: %bb.4(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.4.Flow: + ; GFX942-NEXT: successors: %bb.5(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.5 (%ir-block.33): + ; GFX942-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll index e935245e30f12..6cd03a2ecaff0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s @@ -17,17 +17,17 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p ; GFX90A-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): @@ -136,96 +136,96 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: $vgpr0 = COPY [[PHI]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) - ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE - ; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.2 (%ir-block.5): - ; GFX940-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec - ; GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 - ; GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 - ; GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec - ; GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF1]], implicit $exec - ; GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] - ; GFX940-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] - ; GFX940-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] - ; GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec - ; GFX940-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec - ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.3 (%ir-block.32): - ; GFX940-NEXT: successors: %bb.5(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: S_BRANCH %bb.5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.4.Flow: - ; GFX940-NEXT: successors: %bb.6(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1 - ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.6 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.5 (%ir-block.35): - ; GFX940-NEXT: successors: %bb.4(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 - ; GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec - ; GFX940-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX940-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX940-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.4 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.6 (%ir-block.41): - ; GFX940-NEXT: $vgpr0 = COPY [[PHI]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX942-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE + ; GFX942-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.2 (%ir-block.5): + ; GFX942-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1 + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY7]], [[COPY8]], implicit $exec + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX942-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 [[COPY9]], [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX942-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY2]], 0, [[COPY10]], [[DEF1]], implicit $exec + ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY14]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY15]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY16]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY17]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], [[S_MOV_B32_2]] + ; GFX942-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_]] + ; GFX942-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY18]], implicit $exec + ; GFX942-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[COPY19]], implicit $exec + ; GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.3 (%ir-block.32): + ; GFX942-NEXT: successors: %bb.5(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: S_BRANCH %bb.5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.4.Flow: + ; GFX942-NEXT: successors: %bb.6(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42, %bb.5, [[DEF]], %bb.1 + ; GFX942-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.6 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.5 (%ir-block.35): + ; GFX942-NEXT: successors: %bb.4(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2 + ; GFX942-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec + ; GFX942-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec + ; GFX942-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY20]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_6]], 0, [[COPY21]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.6 (%ir-block.41): + ; GFX942-NEXT: $vgpr0 = COPY [[PHI]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll index 49c5dc7ed5a96..e3bd3d4f22581 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) { ; GFX90A-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw @@ -35,18 +35,18 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) ; GFX90A-NEXT: SI_END_CF [[PHI2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret void } @@ -91,66 +91,66 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) % ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; - ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw - ; GFX940: bb.1 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX942-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw + ; GFX942: bb.1 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret } define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll index 1317770ad834c..9c0db4cd162fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn @@ -15,16 +15,16 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -42,17 +42,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll index a65fc6c0c4cfe..62620a8875a3a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll @@ -1,36 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1) inreg %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn - ; GFX90A_GFX940: bb.1 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn + ; GFX90A_GFX942: bb.1 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec_xnull = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (<2 x s16>) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index be0c9e2a602fa..4ee658666a1b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -O0 -stop-after=irtranslator -o - %s | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=irtranslator -o - %s | FileCheck %s define float @test_atomicrmw_fadd(ptr addrspace(3) %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir similarity index 99% rename from llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir rename to llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir index 2944bb4ebbd17..e11586e464fb4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.gfx942.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=FAST -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GREEDY +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=FAST +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GREEDY --- name: mfma_i32_16x16x32_i8_vva diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir index e9a8248ef4e90..94fde7c4733a3 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX942 %s --- | define amdgpu_kernel void @a_to_v() #0 { ret void } @@ -64,11 +64,11 @@ body: | ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0 ; - ; GFX940-LABEL: name: a_to_v - ; GFX940: liveins: $agpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0 + ; GFX942-LABEL: name: a_to_v + ; GFX942: liveins: $agpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0 $vgpr0 = COPY killed $agpr0, implicit $exec S_ENDPGM 0, implicit $vgpr0 ... @@ -94,12 +94,12 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 ; - ; GFX940-LABEL: name: a2_to_v2 - ; GFX940: liveins: $agpr0_agpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 + ; GFX942-LABEL: name: a2_to_v2 + ; GFX942: liveins: $agpr0_agpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 $vgpr0_vgpr1 = COPY killed $agpr0_agpr1, implicit $exec S_ENDPGM 0, implicit $vgpr0_vgpr1 ... @@ -127,13 +127,13 @@ body: | ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 ; - ; GFX940-LABEL: name: a3_to_v3 - ; GFX940: liveins: $agpr0_agpr1_agpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 + ; GFX942-LABEL: name: a3_to_v3 + ; GFX942: liveins: $agpr0_agpr1_agpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 = COPY killed $agpr0_agpr1_agpr2, implicit $exec S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2 ... @@ -162,14 +162,14 @@ body: | ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; - ; GFX940-LABEL: name: a4_to_v4 - ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-LABEL: name: a4_to_v4 + ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ... @@ -207,18 +207,18 @@ body: | ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; - ; GFX940-LABEL: name: a8_to_v8 - ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-LABEL: name: a8_to_v8 + ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ... @@ -271,26 +271,26 @@ body: | ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; - ; GFX940-LABEL: name: a16_to_v16 - ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-LABEL: name: a16_to_v16 + ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ... @@ -313,11 +313,11 @@ body: | ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0 ; - ; GFX940-LABEL: name: v_to_a - ; GFX940: liveins: $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0 + ; GFX942-LABEL: name: v_to_a + ; GFX942: liveins: $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0 $agpr0 = COPY killed $vgpr0, implicit $exec S_ENDPGM 0, implicit $agpr0 ... @@ -342,12 +342,12 @@ body: | ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 ; - ; GFX940-LABEL: name: v2_to_a2 - ; GFX940: liveins: $vgpr0_vgpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 + ; GFX942-LABEL: name: v2_to_a2 + ; GFX942: liveins: $vgpr0_vgpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 $agpr0_agpr1 = COPY killed $vgpr0_vgpr1, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1 ... @@ -374,13 +374,13 @@ body: | ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; - ; GFX940-LABEL: name: v3_to_a3 - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + ; GFX942-LABEL: name: v3_to_a3 + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 $agpr0_agpr1_agpr2 = COPY killed $vgpr0_vgpr1_vgpr2, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ... @@ -409,14 +409,14 @@ body: | ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ; - ; GFX940-LABEL: name: v4_to_a4 - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-LABEL: name: v4_to_a4 + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 $agpr0_agpr1_agpr2_agpr3 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ... @@ -453,18 +453,18 @@ body: | ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; - ; GFX940-LABEL: name: v8_to_a8 - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-LABEL: name: v8_to_a8 + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ... @@ -517,26 +517,26 @@ body: | ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; - ; GFX940-LABEL: name: v16_to_a16 - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; GFX940-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-LABEL: name: v16_to_a16 + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $vgpr8, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $vgpr9, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $vgpr10, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $vgpr11, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $vgpr12, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GFX942-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ... @@ -560,11 +560,11 @@ body: | ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0 ; - ; GFX940-LABEL: name: s_to_a - ; GFX940: liveins: $sgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0 + ; GFX942-LABEL: name: s_to_a + ; GFX942: liveins: $sgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0 $agpr0 = COPY killed $sgpr0, implicit $exec S_ENDPGM 0, implicit $agpr0 ... @@ -591,12 +591,12 @@ body: | ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 ; - ; GFX940-LABEL: name: s2_to_a2 - ; GFX940: liveins: $sgpr0_sgpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 + ; GFX942-LABEL: name: s2_to_a2 + ; GFX942: liveins: $sgpr0_sgpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1 $agpr0_agpr1 = COPY killed $sgpr0_sgpr1, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1 ... @@ -626,13 +626,13 @@ body: | ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; - ; GFX940-LABEL: name: s3_to_a3 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + ; GFX942-LABEL: name: s3_to_a3 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 $agpr0_agpr1_agpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ... @@ -665,14 +665,14 @@ body: | ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ; - ; GFX940-LABEL: name: s4_to_a4 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-LABEL: name: s4_to_a4 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 $agpr0_agpr1_agpr2_agpr3 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 ... @@ -711,16 +711,16 @@ body: | ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; - ; GFX940-LABEL: name: s6_to_a6 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; GFX942-LABEL: name: s6_to_a6 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ... @@ -765,18 +765,18 @@ body: | ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; - ; GFX940-LABEL: name: s8_to_a8 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-LABEL: name: s8_to_a8 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ... @@ -845,26 +845,26 @@ body: | ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; - ; GFX940-LABEL: name: s16_to_a16 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - ; GFX940-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-LABEL: name: s16_to_a16 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GFX942-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ... @@ -885,10 +885,10 @@ body: | ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0 ; - ; GFX940-LABEL: name: a_to_a - ; GFX940: $agpr1 = IMPLICIT_DEF - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0 + ; GFX942-LABEL: name: a_to_a + ; GFX942: $agpr1 = IMPLICIT_DEF + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0 $agpr1 = IMPLICIT_DEF $agpr0 = COPY killed $agpr1, implicit $exec S_ENDPGM 0, implicit $agpr0 @@ -918,13 +918,13 @@ body: | ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 ; - ; GFX940-LABEL: name: a2_to_a2 - ; GFX940: liveins: $agpr0_agpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 + ; GFX942-LABEL: name: a2_to_a2 + ; GFX942: liveins: $agpr0_agpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 $agpr1_agpr2 = COPY $agpr0_agpr1, implicit $exec $agpr3 = COPY $agpr2 S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 @@ -954,13 +954,13 @@ body: | ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 ; - ; GFX940-LABEL: name: a2_to_a2_kill - ; GFX940: liveins: $agpr0_agpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 + ; GFX942-LABEL: name: a2_to_a2_kill + ; GFX942: liveins: $agpr0_agpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr1_agpr2, implicit $agpr0_agpr1 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1, implicit $exec + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 $agpr1_agpr2 = COPY killed $agpr0_agpr1, implicit $exec $agpr3 = COPY $agpr2 S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 @@ -994,15 +994,15 @@ body: | ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec ; - ; GFX940-LABEL: name: a2_to_a2_implicit_defs - ; GFX940: liveins: $agpr0_agpr1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec + ; GFX942-LABEL: name: a2_to_a2_implicit_defs + ; GFX942: liveins: $agpr0_agpr1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 @@ -1035,13 +1035,13 @@ body: | ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ; - ; GFX940-LABEL: name: a3_to_a3_nonoverlap_kill - ; GFX940: liveins: $agpr4_agpr5_agpr6 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr4_agpr5_agpr6 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + ; GFX942-LABEL: name: a3_to_a3_nonoverlap_kill + ; GFX942: liveins: $agpr4_agpr5_agpr6 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr4_agpr5_agpr6 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 $agpr0_agpr1_agpr2 = COPY killed $agpr4_agpr5_agpr6 S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 ... @@ -1073,14 +1073,14 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 ; - ; GFX940-LABEL: name: a3_to_a3_overlap_kill - ; GFX940: liveins: $agpr1_agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 + ; GFX942-LABEL: name: a3_to_a3_overlap_kill + ; GFX942: liveins: $agpr1_agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr1 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 $agpr0_agpr1_agpr2 = COPY killed $agpr1_agpr2_agpr3 $vgpr1 = COPY $agpr1 S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 @@ -1111,13 +1111,13 @@ body: | ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 ; - ; GFX940-LABEL: name: a4_to_a4 - ; GFX940: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 + ; GFX942-LABEL: name: a4_to_a4 + ; GFX942: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF $agpr2_agpr3_agpr4_agpr5 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 @@ -1151,14 +1151,14 @@ body: | ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 ; - ; GFX940-LABEL: name: a4_to_a4_overlap - ; GFX940: liveins: $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 + ; GFX942-LABEL: name: a4_to_a4_overlap + ; GFX942: liveins: $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr2 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 $agpr2_agpr3_agpr4_agpr5 = COPY $agpr0_agpr1_agpr2_agpr3, implicit $exec S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 ... @@ -1200,17 +1200,17 @@ body: | ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; - ; GFX940-LABEL: name: a8_to_a8 - ; GFX940: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; GFX940-NEXT: $agpr15 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr14 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr13 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr12 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr11 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr10 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr9 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX940-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-LABEL: name: a8_to_a8 + ; GFX942: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; GFX942-NEXT: $agpr15 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit-def $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr14 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr13 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr12 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr11 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr10 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr9 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; GFX942-NEXT: $agpr8 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -1278,25 +1278,25 @@ body: | ; GFX90A-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; - ; GFX940-LABEL: name: a16_to_a16 - ; GFX940: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; GFX940-NEXT: $agpr31 = V_ACCVGPR_MOV_B32 $agpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr30 = V_ACCVGPR_MOV_B32 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr29 = V_ACCVGPR_MOV_B32 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr28 = V_ACCVGPR_MOV_B32 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr27 = V_ACCVGPR_MOV_B32 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr26 = V_ACCVGPR_MOV_B32 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr25 = V_ACCVGPR_MOV_B32 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr24 = V_ACCVGPR_MOV_B32 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr23 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr22 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr21 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr20 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr19 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr18 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr17 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX940-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; GFX942-LABEL: name: a16_to_a16 + ; GFX942: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; GFX942-NEXT: $agpr31 = V_ACCVGPR_MOV_B32 $agpr15, implicit $exec, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr30 = V_ACCVGPR_MOV_B32 $agpr14, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr29 = V_ACCVGPR_MOV_B32 $agpr13, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr28 = V_ACCVGPR_MOV_B32 $agpr12, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr27 = V_ACCVGPR_MOV_B32 $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr26 = V_ACCVGPR_MOV_B32 $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr25 = V_ACCVGPR_MOV_B32 $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr24 = V_ACCVGPR_MOV_B32 $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr23 = V_ACCVGPR_MOV_B32 $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr22 = V_ACCVGPR_MOV_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr21 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr20 = V_ACCVGPR_MOV_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr19 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr18 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr17 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; GFX942-NEXT: $agpr16 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 @@ -1326,12 +1326,12 @@ body: | ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0 ; - ; GFX940-LABEL: name: a_to_a_spill - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $agpr1 = IMPLICIT_DEF - ; GFX940-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr0 + ; GFX942-LABEL: name: a_to_a_spill + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $agpr1 = IMPLICIT_DEF + ; GFX942-NEXT: $agpr0 = V_ACCVGPR_MOV_B32 killed $agpr1, implicit $exec, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0 $agpr1 = IMPLICIT_DEF $agpr0 = COPY killed $agpr1, implicit $exec S_ENDPGM 0, implicit $agpr0 @@ -1368,15 +1368,15 @@ body: | ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; - ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple - ; GFX940: liveins: $agpr0, $sgpr2_sgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-LABEL: name: copy_sgpr_to_agpr_tuple + ; GFX942: liveins: $agpr0, $sgpr2_sgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 S_NOP 0, implicit-def dead $sgpr0_sgpr1 renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -1412,15 +1412,15 @@ body: | ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 ; - ; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple_kill - ; GFX940: liveins: $agpr0, $sgpr2_sgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 + ; GFX942-LABEL: name: copy_sgpr_to_agpr_tuple_kill + ; GFX942: liveins: $agpr0, $sgpr2_sgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 S_NOP 0, implicit-def dead $sgpr0_sgpr1 renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 @@ -1457,15 +1457,15 @@ body: | ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 ; - ; GFX940-LABEL: name: copy_agpr_to_agpr_tuple - ; GFX940: liveins: $agpr0, $agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-LABEL: name: copy_agpr_to_agpr_tuple + ; GFX942: liveins: $agpr0, $agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 S_NOP 0, implicit-def dead $agpr0_agpr1 renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 @@ -1502,15 +1502,15 @@ body: | ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec ; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 ; - ; GFX940-LABEL: name: copy_agpr_to_agpr_tuple_kill - ; GFX940: liveins: $agpr0, $agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 - ; GFX940-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec - ; GFX940-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 + ; GFX942-LABEL: name: copy_agpr_to_agpr_tuple_kill + ; GFX942: liveins: $agpr0, $agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: S_NOP 0, implicit-def dead $agpr0_agpr1 + ; GFX942-NEXT: $agpr7 = V_ACCVGPR_MOV_B32 $agpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr6 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr5 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX942-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3, implicit $exec + ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 S_NOP 0, implicit-def dead $agpr0_agpr1 renamable $agpr4_agpr5_agpr6_agpr7 = COPY renamable killed $agpr0_agpr1_agpr2_agpr3, implicit $exec S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7 diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll index 835e5e5f06ef0..0114de738ce84 100644 --- a/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefix=ASM %s ; OBJDUMP: Contents of section .rodata: ; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000 ................ diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index f5c9b1a79b476..5f56568ef88e4 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1200 %s @@ -80,14 +80,14 @@ define float @syncscope_system(ptr %addr, float %val) #0 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: syncscope_system: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: syncscope_system: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: syncscope_system: ; GFX1100: ; %bb.0: @@ -187,12 +187,12 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 { ; GFX90A-NEXT: v_mov_b32_e32 v0, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: syncscope_workgroup_rtn: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: syncscope_workgroup_rtn: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: syncscope_workgroup_rtn: ; GFX1100: ; %bb.0: @@ -320,12 +320,12 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: syncscope_workgroup_nortn: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: syncscope_workgroup_nortn: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: syncscope_workgroup_nortn: ; GFX1100: ; %bb.0: @@ -396,12 +396,12 @@ define float @no_unsafe(ptr %addr, float %val) { ; GFX90A-NEXT: v_mov_b32_e32 v0, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: no_unsafe: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: no_unsafe: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: no_unsafe: ; GFX1100: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll index 632f7dbc53373..7b255a76528ce 100644 --- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll +++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940-BACKOFF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX942-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-BACKOFF %s @@ -30,15 +30,15 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 { ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-BACKOFF-LABEL: back_off_barrier_no_fence: -; GFX940-BACKOFF: ; %bb.0: -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX940-BACKOFF-NEXT: s_barrier -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; GFX942-BACKOFF-LABEL: back_off_barrier_no_fence: +; GFX942-BACKOFF: ; %bb.0: +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX942-BACKOFF-NEXT: s_barrier +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: ; GFX10-BACKOFF: ; %bb.0: @@ -88,16 +88,16 @@ define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 { ; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-BACKOFF-LABEL: back_off_barrier_with_fence: -; GFX940-BACKOFF: ; %bb.0: -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-BACKOFF-NEXT: flat_load_dword v0, v[0:1] -; GFX940-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-BACKOFF-NEXT: s_barrier -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) -; GFX940-BACKOFF-NEXT: flat_store_dword v[2:3], v0 sc0 sc1 -; GFX940-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; GFX942-BACKOFF-LABEL: back_off_barrier_with_fence: +; GFX942-BACKOFF: ; %bb.0: +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX942-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-BACKOFF-NEXT: s_barrier +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) +; GFX942-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX942-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-BACKOFF-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: ; GFX10-BACKOFF: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll index 59e47cbc12b29..0b5d47df2cc35 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck --check-prefixes=GCN,GFX-940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX-942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s ; TODO: Add global-isel when it can support bf16 @@ -24,24 +24,24 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_s(bfloat inreg %v) { } define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) { -; GFX-940-LABEL: v_test_cvt_v2f32_v2bf16_v: -; GFX-940: ; %bb.0: -; GFX-940-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v2, v2, v0, s0 -; GFX-940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX-940-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX-940-NEXT: v_add3_u32 v2, v2, v1, s0 -; GFX-940-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX-940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX-940-NEXT: s_nop 0 -; GFX-940-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc -; GFX-940-NEXT: v_perm_b32 v0, v1, v0, s0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: v_test_cvt_v2f32_v2bf16_v: +; GFX-942: ; %bb.0: +; GFX-942-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v2, v2, v0, s0 +; GFX-942-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX-942-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX-942-NEXT: v_add3_u32 v2, v2, v1, s0 +; GFX-942-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX-942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX-942-NEXT: s_nop 0 +; GFX-942-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; GFX-942-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: v_test_cvt_v2f32_v2bf16_v: ; GFX-950: ; %bb.0: @@ -53,27 +53,27 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) { } define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) { -; GFX-940-LABEL: v_test_cvt_v2f32_v2bf16_s: -; GFX-940: ; %bb.0: -; GFX-940-NEXT: s_bfe_u32 s2, s1, 0x10010 -; GFX-940-NEXT: s_add_i32 s2, s2, s1 -; GFX-940-NEXT: s_or_b32 s4, s1, 0x400000 -; GFX-940-NEXT: s_add_i32 s5, s2, 0x7fff -; GFX-940-NEXT: v_cmp_u_f32_e64 s[2:3], s1, s1 -; GFX-940-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GFX-940-NEXT: s_cselect_b32 s1, s4, s5 -; GFX-940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX-940-NEXT: s_bfe_u32 s1, s0, 0x10010 -; GFX-940-NEXT: s_add_i32 s1, s1, s0 -; GFX-940-NEXT: s_or_b32 s3, s0, 0x400000 -; GFX-940-NEXT: s_add_i32 s4, s1, 0x7fff -; GFX-940-NEXT: v_cmp_u_f32_e64 s[0:1], s0, s0 -; GFX-940-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GFX-940-NEXT: s_cselect_b32 s0, s3, s4 -; GFX-940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX-940-NEXT: s_pack_ll_b32_b16 s0, s0, s2 -; GFX-940-NEXT: v_mov_b32_e32 v0, s0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: v_test_cvt_v2f32_v2bf16_s: +; GFX-942: ; %bb.0: +; GFX-942-NEXT: s_bfe_u32 s2, s1, 0x10010 +; GFX-942-NEXT: s_add_i32 s2, s2, s1 +; GFX-942-NEXT: s_or_b32 s4, s1, 0x400000 +; GFX-942-NEXT: s_add_i32 s5, s2, 0x7fff +; GFX-942-NEXT: v_cmp_u_f32_e64 s[2:3], s1, s1 +; GFX-942-NEXT: s_and_b64 s[2:3], s[2:3], exec +; GFX-942-NEXT: s_cselect_b32 s1, s4, s5 +; GFX-942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX-942-NEXT: s_bfe_u32 s1, s0, 0x10010 +; GFX-942-NEXT: s_add_i32 s1, s1, s0 +; GFX-942-NEXT: s_or_b32 s3, s0, 0x400000 +; GFX-942-NEXT: s_add_i32 s4, s1, 0x7fff +; GFX-942-NEXT: v_cmp_u_f32_e64 s[0:1], s0, s0 +; GFX-942-NEXT: s_and_b64 s[0:1], s[0:1], exec +; GFX-942-NEXT: s_cselect_b32 s0, s3, s4 +; GFX-942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX-942-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX-942-NEXT: v_mov_b32_e32 v0, s0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: v_test_cvt_v2f32_v2bf16_s: ; GFX-950: ; %bb.0: @@ -86,17 +86,17 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) { } define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) { -; GFX-940-LABEL: v_test_cvt_f32_bf16_v: -; GFX-940: ; %bb.0: -; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0 -; GFX-940-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX-940-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: v_test_cvt_f32_bf16_v: +; GFX-942: ; %bb.0: +; GFX-942-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v1, v1, v0, s0 +; GFX-942-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX-942-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: v_test_cvt_f32_bf16_v: ; GFX-950: ; %bb.0: @@ -109,47 +109,47 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) { } define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) { -; GFX-940-LABEL: v_test_cvt_v2f64_v2bf16_v: -; GFX-940: ; %bb.0: -; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]| -; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 -; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6 -; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 -; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] -; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc -; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX-940-NEXT: s_brev_b32 s4, 1 -; GFX-940-NEXT: v_and_or_b32 v5, v1, s4, v4 -; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1 -; GFX-940-NEXT: s_movk_i32 s5, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s5 -; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5 -; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc -; GFX-940-NEXT: v_cvt_f32_f64_e64 v5, |v[2:3]| -; GFX-940-NEXT: v_cvt_f64_f32_e32 v[0:1], v5 -; GFX-940-NEXT: v_and_b32_e32 v6, 1, v5 -; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, v[0:1] -; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[2:3]|, v[0:1] -; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 -; GFX-940-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3] -; GFX-940-NEXT: v_add_u32_e32 v0, v5, v0 -; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GFX-940-NEXT: v_and_or_b32 v1, v3, s4, v0 -; GFX-940-NEXT: v_bfe_u32 v0, v0, 16, 1 -; GFX-940-NEXT: v_add3_u32 v0, v0, v1, s5 -; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1 -; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3] -; GFX-940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX-940-NEXT: s_nop 0 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX-940-NEXT: v_perm_b32 v0, v0, v4, s0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: v_test_cvt_v2f64_v2bf16_v: +; GFX-942: ; %bb.0: +; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]| +; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX-942-NEXT: v_and_b32_e32 v7, 1, v6 +; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 +; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] +; GFX-942-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc +; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX-942-NEXT: s_brev_b32 s4, 1 +; GFX-942-NEXT: v_and_or_b32 v5, v1, s4, v4 +; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1 +; GFX-942-NEXT: s_movk_i32 s5, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v5, s5 +; GFX-942-NEXT: v_or_b32_e32 v5, 0x400000, v5 +; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX-942-NEXT: v_cvt_f32_f64_e64 v5, |v[2:3]| +; GFX-942-NEXT: v_cvt_f64_f32_e32 v[0:1], v5 +; GFX-942-NEXT: v_and_b32_e32 v6, 1, v5 +; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, v[0:1] +; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[2:3]|, v[0:1] +; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; GFX-942-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3] +; GFX-942-NEXT: v_add_u32_e32 v0, v5, v0 +; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX-942-NEXT: v_and_or_b32 v1, v3, s4, v0 +; GFX-942-NEXT: v_bfe_u32 v0, v0, 16, 1 +; GFX-942-NEXT: v_add3_u32 v0, v0, v1, s5 +; GFX-942-NEXT: v_or_b32_e32 v1, 0x400000, v1 +; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3] +; GFX-942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX-942-NEXT: s_nop 0 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX-942-NEXT: v_perm_b32 v0, v0, v4, s0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: v_test_cvt_v2f64_v2bf16_v: ; GFX-950: ; %bb.0: @@ -163,24 +163,24 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) { } define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16(float %a, float %b) { -; GFX-940-LABEL: fptrunc_f32_f32_to_v2bf16: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v2, v2, v0, s0 -; GFX-940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX-940-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX-940-NEXT: v_add3_u32 v2, v2, v1, s0 -; GFX-940-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX-940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX-940-NEXT: s_nop 0 -; GFX-940-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc -; GFX-940-NEXT: v_perm_b32 v0, v1, v0, s0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: fptrunc_f32_f32_to_v2bf16: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v2, v2, v0, s0 +; GFX-942-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX-942-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX-942-NEXT: v_add3_u32 v2, v2, v1, s0 +; GFX-942-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX-942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX-942-NEXT: s_nop 0 +; GFX-942-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; GFX-942-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: fptrunc_f32_f32_to_v2bf16: ; GFX-950: ; %bb.0: ; %entry @@ -196,26 +196,26 @@ entry: } define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16_mods(float %a, float %b) { -; GFX-940-LABEL: fptrunc_f32_f32_to_v2bf16_mods: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 -; GFX-940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v3, v3, v2, s0 -; GFX-940-NEXT: v_or_b32_e32 v2, 0x400000, v2 -; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX-940-NEXT: v_and_b32_e32 v2, 0x7fffffff, v1 -; GFX-940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX-940-NEXT: v_add3_u32 v3, v3, v2, s0 -; GFX-940-NEXT: v_or_b32_e32 v2, 0x400000, v2 -; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v1|, |v1| -; GFX-940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX-940-NEXT: s_nop 0 -; GFX-940-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc -; GFX-940-NEXT: v_perm_b32 v0, v1, v0, s0 -; GFX-940-NEXT: ; return to shader part epilog +; GFX-942-LABEL: fptrunc_f32_f32_to_v2bf16_mods: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 +; GFX-942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v3, v3, v2, s0 +; GFX-942-NEXT: v_or_b32_e32 v2, 0x400000, v2 +; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX-942-NEXT: v_and_b32_e32 v2, 0x7fffffff, v1 +; GFX-942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX-942-NEXT: v_add3_u32 v3, v3, v2, s0 +; GFX-942-NEXT: v_or_b32_e32 v2, 0x400000, v2 +; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, |v1|, |v1| +; GFX-942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX-942-NEXT: s_nop 0 +; GFX-942-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc +; GFX-942-NEXT: v_perm_b32 v0, v1, v0, s0 +; GFX-942-NEXT: ; return to shader part epilog ; ; GFX-950-LABEL: fptrunc_f32_f32_to_v2bf16_mods: ; GFX-950: ; %bb.0: ; %entry @@ -233,19 +233,19 @@ entry: } define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f32_to_bf16: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_mov_b32_e32 v3, v2 -; GFX-940-NEXT: v_mov_b32_e32 v2, v1 -; GFX-940-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v1, v1, v0, s0 -; GFX-940-NEXT: v_or_b32_e32 v4, 0x400000, v0 -; GFX-940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f32_to_bf16: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_mov_b32_e32 v3, v2 +; GFX-942-NEXT: v_mov_b32_e32 v2, v1 +; GFX-942-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v1, v1, v0, s0 +; GFX-942-NEXT: v_or_b32_e32 v4, 0x400000, v0 +; GFX-942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f32_to_bf16: ; GFX-950: ; %bb.0: ; %entry @@ -261,20 +261,20 @@ entry: } define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f32_to_bf16_abs: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_mov_b32_e32 v3, v2 -; GFX-940-NEXT: v_mov_b32_e32 v2, v1 -; GFX-940-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 -; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0 -; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1 -; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0| -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f32_to_bf16_abs: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_mov_b32_e32 v3, v2 +; GFX-942-NEXT: v_mov_b32_e32 v2, v1 +; GFX-942-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 +; GFX-942-NEXT: v_bfe_u32 v4, v1, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v1, s0 +; GFX-942-NEXT: v_or_b32_e32 v1, 0x400000, v1 +; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, |v0|, |v0| +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f32_to_bf16_abs: ; GFX-950: ; %bb.0: ; %entry @@ -291,20 +291,20 @@ entry: } define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f32_to_bf16_neg: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_mov_b32_e32 v3, v2 -; GFX-940-NEXT: v_mov_b32_e32 v2, v1 -; GFX-940-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 -; GFX-940-NEXT: v_bfe_u32 v4, v1, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v1, s0 -; GFX-940-NEXT: v_or_b32_e32 v1, 0x400000, v1 -; GFX-940-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0 -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f32_to_bf16_neg: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_mov_b32_e32 v3, v2 +; GFX-942-NEXT: v_mov_b32_e32 v2, v1 +; GFX-942-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 +; GFX-942-NEXT: v_bfe_u32 v4, v1, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v1, s0 +; GFX-942-NEXT: v_or_b32_e32 v1, 0x400000, v1 +; GFX-942-NEXT: v_cmp_u_f32_e64 vcc, -v0, -v0 +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f32_to_bf16_neg: ; GFX-950: ; %bb.0: ; %entry @@ -321,29 +321,29 @@ entry: } define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f64_to_bf16: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]| -; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 -; GFX-940-NEXT: v_and_b32_e32 v7, 1, v6 -; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 -; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] -; GFX-940-NEXT: v_add_u32_e32 v4, v6, v4 -; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc -; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX-940-NEXT: s_brev_b32 s0, 1 -; GFX-940-NEXT: v_and_or_b32 v5, v1, s0, v4 -; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0 -; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5 -; GFX-940-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f64_to_bf16: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]| +; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX-942-NEXT: v_and_b32_e32 v7, 1, v6 +; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 +; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] +; GFX-942-NEXT: v_add_u32_e32 v4, v6, v4 +; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc +; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX-942-NEXT: s_brev_b32 s0, 1 +; GFX-942-NEXT: v_and_or_b32 v5, v1, s0, v4 +; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v5, s0 +; GFX-942-NEXT: v_or_b32_e32 v5, 0x400000, v5 +; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f64_to_bf16: ; GFX-950: ; %bb.0: ; %entry @@ -358,30 +358,30 @@ entry: } define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f64_to_bf16_neg: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]| -; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7 -; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7 -; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 -; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] -; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4 -; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc -; GFX-940-NEXT: s_brev_b32 s4, 1 -; GFX-940-NEXT: v_xor_b32_e32 v6, 0x80000000, v1 -; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; GFX-940-NEXT: v_and_or_b32 v5, v6, s4, v4 -; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0 -; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5 -; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1] -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f64_to_bf16_neg: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]| +; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v7 +; GFX-942-NEXT: v_and_b32_e32 v8, 1, v7 +; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 +; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] +; GFX-942-NEXT: v_add_u32_e32 v4, v7, v4 +; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc +; GFX-942-NEXT: s_brev_b32 s4, 1 +; GFX-942-NEXT: v_xor_b32_e32 v6, 0x80000000, v1 +; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX-942-NEXT: v_and_or_b32 v5, v6, s4, v4 +; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v5, s0 +; GFX-942-NEXT: v_or_b32_e32 v5, 0x400000, v5 +; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1] +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f64_to_bf16_neg: ; GFX-950: ; %bb.0: ; %entry @@ -397,30 +397,30 @@ entry: } define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) { -; GFX-940-LABEL: fptrunc_f64_to_bf16_abs: -; GFX-940: ; %bb.0: ; %entry -; GFX-940-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]| -; GFX-940-NEXT: v_cvt_f64_f32_e32 v[4:5], v7 -; GFX-940-NEXT: v_and_b32_e32 v8, 1, v7 -; GFX-940-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] -; GFX-940-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 -; GFX-940-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] -; GFX-940-NEXT: v_add_u32_e32 v4, v7, v4 -; GFX-940-NEXT: s_or_b64 vcc, s[0:1], vcc -; GFX-940-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1 -; GFX-940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; GFX-940-NEXT: s_brev_b32 s0, 1 -; GFX-940-NEXT: v_and_or_b32 v5, v6, s0, v4 -; GFX-940-NEXT: v_bfe_u32 v4, v4, 16, 1 -; GFX-940-NEXT: s_movk_i32 s0, 0x7fff -; GFX-940-NEXT: v_add3_u32 v4, v4, v5, s0 -; GFX-940-NEXT: v_or_b32_e32 v5, 0x400000, v5 -; GFX-940-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]| -; GFX-940-NEXT: s_nop 1 -; GFX-940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX-940-NEXT: flat_store_short_d16_hi v[2:3], v0 sc0 sc1 -; GFX-940-NEXT: s_endpgm +; GFX-942-LABEL: fptrunc_f64_to_bf16_abs: +; GFX-942: ; %bb.0: ; %entry +; GFX-942-NEXT: v_cvt_f32_f64_e64 v7, |v[0:1]| +; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v7 +; GFX-942-NEXT: v_and_b32_e32 v8, 1, v7 +; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5] +; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 +; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3] +; GFX-942-NEXT: v_add_u32_e32 v4, v7, v4 +; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc +; GFX-942-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1 +; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX-942-NEXT: s_brev_b32 s0, 1 +; GFX-942-NEXT: v_and_or_b32 v5, v6, s0, v4 +; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1 +; GFX-942-NEXT: s_movk_i32 s0, 0x7fff +; GFX-942-NEXT: v_add3_u32 v4, v4, v5, s0 +; GFX-942-NEXT: v_or_b32_e32 v5, 0x400000, v5 +; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]| +; GFX-942-NEXT: s_nop 1 +; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0 +; GFX-942-NEXT: s_endpgm ; ; GFX-950-LABEL: fptrunc_f64_to_bf16_abs: ; GFX-950: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll index 85a701b23a6cf..a14114358433a 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -23,19 +23,19 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void } @@ -56,20 +56,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -90,20 +90,20 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -126,22 +126,22 @@ define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } @@ -167,25 +167,25 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void } @@ -212,26 +212,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -258,26 +258,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -306,28 +306,28 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll index 417dee573c5da..eb452dc4b874f 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll @@ -1,26 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_rtn ; GFX11: bb.0 (%ir-block.0): @@ -41,21 +41,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> } define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -77,21 +77,21 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_offen_rtn(float %val, <4 x i32> i } define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -113,23 +113,23 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_idxen_rtn(float %val, <4 x i32> i } define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -153,26 +153,26 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn ; GFX11: bb.0 (%ir-block.0): @@ -199,27 +199,27 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -247,27 +247,27 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -295,29 +295,29 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn ; GFX11: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll index ff087715e0fc0..6885657bbfa36 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll @@ -1,441 +1,441 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY8]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY8]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY9]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY9]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY9]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY10]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY9]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY10]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY10]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY11]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY10]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY11]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY12]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY13]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY12]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY13]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY13]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY14]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY13]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY14]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY13]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY14]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY13]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY14]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY14]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY15]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY14]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY15]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll index 44fddc372293f..790cd8ef9eccf 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn @@ -21,19 +21,19 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, < ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void } @@ -54,20 +54,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -88,20 +88,20 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -124,22 +124,22 @@ define amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, < ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } @@ -165,25 +165,25 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void } @@ -210,26 +210,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -256,26 +256,26 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -304,28 +304,28 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll index c4ef1390a288f..89e1a4be4e16c 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll @@ -1,191 +1,191 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll index 0ea73ad4c5019..ba2694fca99fa 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,15 +28,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -216,15 +216,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -403,33 +403,33 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v5, v4, s[4:7], 0 offen offset:1024 sc0 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v5, v4, s[4:7], 0 offen offset:1024 sc0 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -799,15 +799,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1004,15 +1004,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1203,15 +1203,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset(ptr addrspace(7) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset: ; GFX11: ; %bb.0: @@ -1427,15 +1427,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1651,15 +1651,15 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1900,15 +1900,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2149,15 +2149,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2428,36 +2428,36 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2920,15 +2920,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remot ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -3189,15 +3189,15 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -3456,39 +3456,39 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, s6, v3 -; GFX940-NEXT: v_add_f16_e32 v2, v2, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, s6, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s7, v2 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, s6, v3 +; GFX942-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, s6, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s7, v2 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3804,38 +3804,38 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, s6, v3 -; GFX940-NEXT: v_add_f16_e32 v2, v2, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, s6, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s7, v2 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, s6, v3 +; GFX942-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, s6, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s7, v2 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4179,72 +4179,72 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v10, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v6, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v11, v6 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v10, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB15_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB15_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v6, v4, v7 -; GFX940-NEXT: v_add_f16_e32 v6, v6, v5 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, v4, v6 -; GFX940-NEXT: v_and_or_b32 v6, v7, v11, v6 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v8 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB15_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v4, v8 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v10, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v6, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v11, v6 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v10, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB15_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB15_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v6, v4, v7 +; GFX942-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX942-NEXT: v_and_or_b32 v6, v7, v11, v6 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v8 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB15_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v4, v8 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4798,47 +4798,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v5 +; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v2, v2, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5202,46 +5202,46 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v3 -; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v4, v4, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5634,80 +5634,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 -; GFX940-NEXT: v_not_b32_e32 v10, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB18_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_add_f32_e32 v4, v4, v11 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX942-NEXT: v_not_b32_e32 v10, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v11, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB18_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_add_f32_e32 v4, v4, v11 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v8, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6256,15 +6256,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6505,15 +6505,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6756,33 +6756,33 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v5, v4, s[4:7], 0 offen offset:1024 sc0 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v5, v4, s[4:7], 0 offen offset:1024 sc0 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7235,15 +7235,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset(ptr addrsp ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset: ; GFX11: ; %bb.0: @@ -7501,15 +7501,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset(ptr addrspace( ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset: ; GFX11: ; %bb.0: @@ -7764,15 +7764,15 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -8030,15 +8030,15 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_rem ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2f16__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -8297,50 +8297,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v1, v1, v3 -; GFX940-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v5, v5, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v1, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] -; GFX940-NEXT: v_perm_b32 v6, v1, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[6:7] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX942-NEXT: v_bfe_u32 v5, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v5, v5, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v1, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] +; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8680,49 +8680,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9075,83 +9075,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v9, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 -; GFX940-NEXT: s_mov_b32 s11, 0x7060302 -; GFX940-NEXT: .LBB28_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB28_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v7 -; GFX940-NEXT: v_add_f32_e32 v4, v4, v9 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v10 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s10 -; GFX940-NEXT: v_or_b32_e32 v11, 0x400000, v5 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc -; GFX940-NEXT: v_perm_b32 v6, v5, v4, s11 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB28_4: ; Parent Loop BB28_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB28_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB28_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 +; GFX942-NEXT: s_mov_b32 s11, 0x7060302 +; GFX942-NEXT: .LBB28_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB28_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v7 +; GFX942-NEXT: v_add_f32_e32 v4, v4, v9 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v10 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s10 +; GFX942-NEXT: v_or_b32_e32 v11, 0x400000, v5 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc +; GFX942-NEXT: v_perm_b32 v6, v5, v4, s11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB28_4: ; Parent Loop BB28_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB28_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB28_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9713,50 +9713,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset(ptr add ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v1, v1, v3 -; GFX940-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v5, v5, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v1, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] -; GFX940-NEXT: v_perm_b32 v6, v1, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[6:7] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX942-NEXT: v_bfe_u32 v5, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v5, v5, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v1, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] +; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -10096,49 +10096,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset(ptr addrspace ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -10470,50 +10470,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v1, v1, v3 -; GFX940-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v5, v5, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v1, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] -; GFX940-NEXT: v_perm_b32 v6, v1, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[6:7] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX942-NEXT: v_bfe_u32 v5, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v5, v5, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v1, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] +; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -10853,49 +10853,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_re ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -11227,49 +11227,49 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_add_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -11606,15 +11606,15 @@ define float @buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], 0 offen offset:1024 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fadd_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll index 7f06d169a6b13..38adf60888eca 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,34 +28,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -203,33 +203,33 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v0, v1, v1 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v0, v1, v1 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -395,66 +395,66 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f32_e32 v9, v5, v5 -; GFX940-NEXT: .LBB2_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB2_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX940-NEXT: v_max_f32_e32 v6, v4, v9 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB2_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB2_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_max_f32_e32 v9, v5, v5 +; GFX942-NEXT: .LBB2_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB2_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX942-NEXT: v_max_f32_e32 v6, v4, v9 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB2_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB2_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -766,34 +766,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1023,34 +1023,34 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1226,15 +1226,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1417,15 +1417,15 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1643,36 +1643,36 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_max_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_max_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2033,15 +2033,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remot ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -2320,15 +2320,15 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -2529,41 +2529,41 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v5 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f16_e32 v5, v0, v0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v5 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2892,40 +2892,40 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f16_e32 v3, v0, v0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3283,74 +3283,74 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 -; GFX940-NEXT: v_not_b32_e32 v10, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f16_e32 v11, v5, v5 -; GFX940-NEXT: .LBB12_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB12_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v8, v7 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v11 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v8, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB12_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX942-NEXT: v_not_b32_e32 v10, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_max_f16_e32 v11, v5, v5 +; GFX942-NEXT: .LBB12_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB12_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v11 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB12_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v8, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3915,47 +3915,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v5 -; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v5 +; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v2, v2, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4321,46 +4321,46 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v4, v4, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4755,80 +4755,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 -; GFX940-NEXT: v_not_b32_e32 v10, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: .LBB15_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB15_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_max_f32_e32 v4, v4, v11 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB15_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX942-NEXT: v_not_b32_e32 v10, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v11, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: .LBB15_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB15_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_max_f32_e32 v4, v4, v11 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB15_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v8, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5402,35 +5402,35 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_pk_max_f16 v0, v5, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_pk_max_f16 v4, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_pk_max_f16 v0, v5, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_pk_max_f16 v4, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5720,34 +5720,34 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v0, v1, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v0, v1, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6071,66 +6071,66 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall ; GFX12-NEXT: v_mov_b32_e32 v0, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_pk_max_f16 v9, v5, v5 -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB18_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_pk_max_f16 v6, v4, v9 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_pk_max_f16 v9, v5, v5 +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB18_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_pk_max_f16 v6, v4, v9 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6669,50 +6669,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX940-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v5, v5, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v1, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] -; GFX940-NEXT: v_perm_b32 v6, v1, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[6:7] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX942-NEXT: v_bfe_u32 v5, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v5, v5, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v1, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] +; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7089,49 +7089,49 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fi ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7542,83 +7542,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX12-NEXT: v_mov_b32_e32 v0, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v9, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 -; GFX940-NEXT: s_mov_b32 s11, 0x7060302 -; GFX940-NEXT: .LBB21_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB21_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v7 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v9 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v10 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s10 -; GFX940-NEXT: v_or_b32_e32 v11, 0x400000, v5 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc -; GFX940-NEXT: v_perm_b32 v6, v5, v4, s11 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB21_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 +; GFX942-NEXT: s_mov_b32 s11, 0x7060302 +; GFX942-NEXT: .LBB21_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB21_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v7 +; GFX942-NEXT: v_max_f32_e32 v4, v4, v9 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v10 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s10 +; GFX942-NEXT: v_or_b32_e32 v11, 0x400000, v5 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc +; GFX942-NEXT: v_perm_b32 v6, v5, v4, s11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB21_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8185,34 +8185,34 @@ define float @buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fmax_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll index a6eb81fcbf515..2b8cea9068d87 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,34 +28,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -203,33 +203,33 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v0, v1, v1 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v0, v1, v1 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -395,66 +395,66 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f32_e32 v9, v5, v5 -; GFX940-NEXT: .LBB2_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB2_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX940-NEXT: v_min_f32_e32 v6, v4, v9 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB2_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB2_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_max_f32_e32 v9, v5, v5 +; GFX942-NEXT: .LBB2_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB2_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX942-NEXT: v_min_f32_e32 v6, v4, v9 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB2_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB2_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -766,34 +766,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1023,34 +1023,34 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1226,15 +1226,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1417,15 +1417,15 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1643,36 +1643,36 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_min_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_min_f64 v[6:7], v4, s[4:7], 0 offen offset:2048 sc0 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2033,15 +2033,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remot ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -2320,15 +2320,15 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s16 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s16 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen offset:2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -2529,41 +2529,41 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX940-NEXT: v_min_f16_e32 v0, v0, v5 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f16_e32 v5, v0, v0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX942-NEXT: v_min_f16_e32 v0, v0, v5 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2892,40 +2892,40 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 -; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX940-NEXT: v_min_f16_e32 v0, v0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f16_e32 v3, v0, v0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX942-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX942-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3283,74 +3283,74 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 -; GFX940-NEXT: v_not_b32_e32 v10, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f16_e32 v11, v5, v5 -; GFX940-NEXT: .LBB12_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB12_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v8, v7 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_min_f16_e32 v4, v4, v11 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v8, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB12_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX942-NEXT: v_not_b32_e32 v10, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_max_f16_e32 v11, v5, v5 +; GFX942-NEXT: .LBB12_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB12_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_min_f16_e32 v4, v4, v11 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB12_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v8, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3915,47 +3915,47 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v5 -; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v5 +; GFX942-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v2, v2, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, s6, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4321,46 +4321,46 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_addk_i32 s16, 0x200 -; GFX940-NEXT: s_and_b32 s4, s16, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_and_b32 s4, s16, 3 -; GFX940-NEXT: s_lshl_b32 s6, s4, 3 -; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 -; GFX940-NEXT: s_not_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_addk_i32 s16, 0x200 +; GFX942-NEXT: s_and_b32 s4, s16, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen +; GFX942-NEXT: s_and_b32 s4, s16, 3 +; GFX942-NEXT: s_lshl_b32 s6, s4, 3 +; GFX942-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX942-NEXT: s_not_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX942-NEXT: v_bfe_u32 v4, v0, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX942-NEXT: v_add3_u32 v4, v4, v0, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v0, v1, s7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4755,80 +4755,80 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 -; GFX940-NEXT: v_not_b32_e32 v10, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: .LBB15_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB15_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_min_f32_e32 v4, v4, v11 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB15_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v4, 0x200, v4 +; GFX942-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX942-NEXT: v_not_b32_e32 v10, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v11, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: .LBB15_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB15_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_min_f32_e32 v4, v4, v11 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v10, v4 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB15_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v8, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5402,35 +5402,35 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_pk_max_f16 v0, v5, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_pk_min_f16 v4, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_pk_max_f16 v0, v5, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_pk_min_f16 v4, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5720,34 +5720,34 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v0, v1, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v0, v1, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v1 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2f16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6071,66 +6071,66 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall ; GFX12-NEXT: v_mov_b32_e32 v0, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_pk_max_f16 v9, v5, v5 -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB18_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v7, v7 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_pk_min_f16 v6, v4, v9 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_pk_max_f16 v9, v5, v5 +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB18_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v7, v7 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_pk_min_f16 v6, v4, v9 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6669,50 +6669,50 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX940-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v5, v5, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v1, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] -; GFX940-NEXT: v_perm_b32 v6, v1, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[6:7] -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX942-NEXT: v_bfe_u32 v5, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v5, v5, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v1, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v5, v6, s[4:5] +; GFX942-NEXT: v_perm_b32 v6, v1, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7089,49 +7089,49 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fi ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s16 -; GFX940-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s4, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[6:7], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX940-NEXT: s_movk_i32 s8, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX940-NEXT: s_mov_b32 s9, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v3 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s8 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s8 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s9 -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[0:1] -; GFX940-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 -; GFX940-NEXT: s_or_b64 s[6:7], vcc, s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s16 +; GFX942-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s4, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[6:7], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX942-NEXT: s_movk_i32 s8, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX942-NEXT: s_mov_b32 s9, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v1 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v3 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s8 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s8 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[4:5], v0, v0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v7, s[4:5] +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v6, v1 +; GFX942-NEXT: s_or_b64 s[6:7], vcc, s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_v2bf16__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7542,83 +7542,83 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX12-NEXT: v_mov_b32_e32 v0, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v8, 0x400, v4 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 -; GFX940-NEXT: ; implicit-def: $vgpr4 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v9, 16, v5 -; GFX940-NEXT: s_movk_i32 s10, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 -; GFX940-NEXT: s_mov_b32 s11, 0x7060302 -; GFX940-NEXT: .LBB21_3: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Loop Header: Depth=1 -; GFX940-NEXT: ; Child Loop BB21_4 Depth 2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v7 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v9 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v10 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s10 -; GFX940-NEXT: v_or_b32_e32 v11, 0x400000, v5 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc -; GFX940-NEXT: v_perm_b32 v6, v5, v4, s11 -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] -; GFX940-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 -; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX940-NEXT: v_readfirstlane_b32 s4, v0 -; GFX940-NEXT: v_readfirstlane_b32 s5, v1 -; GFX940-NEXT: v_readfirstlane_b32 s6, v2 -; GFX940-NEXT: v_readfirstlane_b32 s7, v3 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_4 -; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX940-NEXT: s_mov_b64 exec, s[8:9] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB21_3 -; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v8, 0x400, v4 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX942-NEXT: ; implicit-def: $vgpr4 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX942-NEXT: s_movk_i32 s10, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v10, 0xffff0000, v5 +; GFX942-NEXT: s_mov_b32 s11, 0x7060302 +; GFX942-NEXT: .LBB21_3: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Loop Header: Depth=1 +; GFX942-NEXT: ; Child Loop BB21_4 Depth 2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v7 +; GFX942-NEXT: v_min_f32_e32 v4, v4, v9 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s10 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v10 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s10 +; GFX942-NEXT: v_or_b32_e32 v11, 0x400000, v5 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v11, vcc +; GFX942-NEXT: v_perm_b32 v6, v5, v4, s11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] +; GFX942-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 +; GFX942-NEXT: ; => This Inner Loop Header: Depth=2 +; GFX942-NEXT: v_readfirstlane_b32 s4, v0 +; GFX942-NEXT: v_readfirstlane_b32 s5, v1 +; GFX942-NEXT: v_readfirstlane_b32 s6, v2 +; GFX942-NEXT: v_readfirstlane_b32 s7, v3 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_4 +; GFX942-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 +; GFX942-NEXT: s_mov_b64 exec, s[8:9] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB21_3 +; GFX942-NEXT: ; %bb.6: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8185,34 +8185,34 @@ define float @buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v0, s16 -; GFX940-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 -; GFX940-NEXT: s_add_i32 s6, s16, 0x400 -; GFX940-NEXT: s_mov_b64 s[4:5], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], v[4:5] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 -; GFX940-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, s16 +; GFX942-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:1024 +; GFX942-NEXT: s_add_i32 s6, s16, 0x400 +; GFX942-NEXT: s_mov_b64 s[4:5], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: buffer_atomic_cmpswap v[0:1], v3, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v5 +; GFX942-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_system_atomic_fmin_ret_f32__offset__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/build_vector.ll b/llvm/test/CodeGen/AMDGPU/build_vector.ll index 4ab940288e8c8..b79984f41114e 100644 --- a/llvm/test/CodeGen/AMDGPU/build_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/build_vector.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefixes=GFX8,GCN ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 | FileCheck %s --check-prefixes=GFX10,GCN ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 | FileCheck %s --check-prefixes=GFX11,GCN -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,GCN +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s --check-prefixes=GFX942,GCN define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) { ; GFX6-LABEL: build_vector2: @@ -48,15 +48,15 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) { ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm ; -; GFX940-LABEL: build_vector2: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, 5 -; GFX940-NEXT: v_mov_b32_e32 v1, 6 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: build_vector2: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, 5 +; GFX942-NEXT: v_mov_b32_e32 v1, 6 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm entry: store <2 x i32> , ptr addrspace(1) %out ret void @@ -113,17 +113,17 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) { ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm ; -; GFX940-LABEL: build_vector4: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, 5 -; GFX940-NEXT: v_mov_b32_e32 v1, 6 -; GFX940-NEXT: v_mov_b32_e32 v2, 7 -; GFX940-NEXT: v_mov_b32_e32 v3, 8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: build_vector4: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, 5 +; GFX942-NEXT: v_mov_b32_e32 v1, 6 +; GFX942-NEXT: v_mov_b32_e32 v2, 7 +; GFX942-NEXT: v_mov_b32_e32 v3, 8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_endpgm entry: store <4 x i32> , ptr addrspace(1) %out ret void @@ -168,14 +168,14 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) { ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm ; -; GFX940-LABEL: build_vector_v2i16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, 0x60005 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: build_vector_v2i16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0x60005 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm entry: store <2 x i16> , ptr addrspace(1) %out ret void @@ -232,17 +232,17 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm ; -; GFX940-LABEL: build_vector_v2i16_trunc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshr_b32 s2, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s2, s2, 5 -; GFX940-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: build_vector_v2i16_trunc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshr_b32 s2, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s2, s2, 5 +; GFX942-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %srl = lshr i32 %a, 16 %trunc = trunc i32 %srl to i16 %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0 @@ -304,17 +304,17 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm ; -; GFX940-LABEL: build_v2i32_from_v4i16_shuffle: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s3, s3, 16 -; GFX940-NEXT: s_lshl_b32 s2, s2, 16 -; GFX940-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s3, s3, 16 +; GFX942-NEXT: s_lshl_b32 s2, s2, 16 +; GFX942-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v1, s3 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm entry: %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> %zextended = zext <2 x i16> %shuf to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir index 7c21b3e085804..d5dfb5dd0848d 100644 --- a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir +++ b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX90A %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX942 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s @@ -22,10 +22,10 @@ body: | ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_v64_to_v64 - ; GFX940: liveins: $vgpr2_vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec + ; GFX942-LABEL: name: copy_v64_to_v64 + ; GFX942: liveins: $vgpr2_vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec ; ; GFX10-LABEL: name: copy_v64_to_v64 ; GFX10: liveins: $vgpr2_vgpr3 @@ -52,10 +52,10 @@ body: | ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr2_sgpr3, 12, $sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_s64_to_v64 - ; GFX940: liveins: $sgpr2_sgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec + ; GFX942-LABEL: name: copy_s64_to_v64 + ; GFX942: liveins: $sgpr2_sgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $sgpr2_sgpr3, implicit $exec, implicit $exec ; ; GFX10-LABEL: name: copy_s64_to_v64 ; GFX10: liveins: $sgpr2_sgpr3 @@ -83,11 +83,11 @@ body: | ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_a64_to_v64 - ; GFX940: liveins: $agpr2_agpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3 - ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec + ; GFX942-LABEL: name: copy_a64_to_v64 + ; GFX942: liveins: $agpr2_agpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr2_agpr3 + ; GFX942-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit killed $agpr2_agpr3, implicit $exec ; ; GFX10-LABEL: name: copy_a64_to_v64 ; GFX10: liveins: $agpr2_agpr3 @@ -117,11 +117,11 @@ body: | ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr4_vgpr5, 12, $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec ; - ; GFX940-LABEL: name: copy_v128_to_v128_fwd - ; GFX940: liveins: $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec + ; GFX942-LABEL: name: copy_v128_to_v128_fwd + ; GFX942: liveins: $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr4_vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5, implicit $exec ; ; GFX10-LABEL: name: copy_v128_to_v128_fwd ; GFX10: liveins: $vgpr2_vgpr3_vgpr4_vgpr5 @@ -153,11 +153,11 @@ body: | ; GFX90A-NEXT: $vgpr4_vgpr5 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 12, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_v128_to_v128_back - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; GFX942-LABEL: name: copy_v128_to_v128_back + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec ; ; GFX10-LABEL: name: copy_v128_to_v128_back ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 @@ -189,12 +189,12 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6 ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec ; - ; GFX940-LABEL: name: copy_v96_to_v96 - ; GFX940: liveins: $vgpr4_vgpr5_vgpr6 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec + ; GFX942-LABEL: name: copy_v96_to_v96 + ; GFX942: liveins: $vgpr4_vgpr5_vgpr6 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr4_vgpr5_vgpr6 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr4_vgpr5_vgpr6 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit killed $vgpr4_vgpr5_vgpr6, implicit $exec ; ; GFX10-LABEL: name: copy_v96_to_v96 ; GFX10: liveins: $vgpr4_vgpr5_vgpr6 @@ -222,10 +222,10 @@ body: | ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_v64_to_v64_undef_sub0 - ; GFX940: liveins: $vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec + ; GFX942-LABEL: name: copy_v64_to_v64_undef_sub0 + ; GFX942: liveins: $vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec ; ; GFX10-LABEL: name: copy_v64_to_v64_undef_sub0 ; GFX10: liveins: $vgpr3 @@ -252,10 +252,10 @@ body: | ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr2_vgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_v64_to_v64_undef_sub1 - ; GFX940: liveins: $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec + ; GFX942-LABEL: name: copy_v64_to_v64_undef_sub1 + ; GFX942: liveins: $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 killed $vgpr2_vgpr3, implicit $exec, implicit $exec ; ; GFX10-LABEL: name: copy_v64_to_v64_undef_sub1 ; GFX10: liveins: $vgpr2 @@ -285,11 +285,11 @@ body: | ; GFX90A-NEXT: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr4_sgpr5, 12, $sgpr4_sgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX90A-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $sgpr6_sgpr7, 12, $sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 ; - ; GFX940-LABEL: name: copy_s128_to_v128_killed - ; GFX940: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7 - ; GFX940-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-LABEL: name: copy_s128_to_v128_killed + ; GFX942: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr4_sgpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX942-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e32 $sgpr6_sgpr7, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7 ; ; GFX10-LABEL: name: copy_s128_to_v128_killed ; GFX10: liveins: $sgpr4_sgpr5_sgpr6_sgpr7 @@ -319,11 +319,11 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3 ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec ; - ; GFX940-LABEL: name: copy_v64_to_v64_unaligned - ; GFX940: liveins: $vgpr2_vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec + ; GFX942-LABEL: name: copy_v64_to_v64_unaligned + ; GFX942: liveins: $vgpr2_vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec ; ; GFX10-LABEL: name: copy_v64_to_v64_unaligned ; GFX10: liveins: $vgpr2_vgpr3 @@ -351,11 +351,11 @@ body: | ; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4 ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec ; - ; GFX940-LABEL: name: copy_v64_unaligned_to_v64 - ; GFX940: liveins: $vgpr3_vgpr4 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec + ; GFX942-LABEL: name: copy_v64_unaligned_to_v64 + ; GFX942: liveins: $vgpr3_vgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec ; ; GFX10-LABEL: name: copy_v64_unaligned_to_v64 ; GFX10: liveins: $vgpr3_vgpr4 @@ -387,13 +387,13 @@ body: | ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec ; - ; GFX940-LABEL: name: copy_v128_to_v128_unaligned - ; GFX940: liveins: $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX940-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec + ; GFX942-LABEL: name: copy_v128_to_v128_unaligned + ; GFX942: liveins: $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec ; ; GFX10-LABEL: name: copy_v128_to_v128_unaligned ; GFX10: liveins: $vgpr8_vgpr9_vgpr10_vgpr11 @@ -427,13 +427,13 @@ body: | ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec ; - ; GFX940-LABEL: name: copy_v128_unaligned_to_v128 - ; GFX940: liveins: $vgpr7_vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec + ; GFX942-LABEL: name: copy_v128_unaligned_to_v128 + ; GFX942: liveins: $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec ; ; GFX10-LABEL: name: copy_v128_unaligned_to_v128 ; GFX10: liveins: $vgpr7_vgpr8_vgpr9_vgpr10 @@ -463,11 +463,11 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9 ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec ; - ; GFX940-LABEL: name: copy_s64_to_v64_unaligned - ; GFX940: liveins: $sgpr8_sgpr9 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec + ; GFX942-LABEL: name: copy_s64_to_v64_unaligned + ; GFX942: liveins: $sgpr8_sgpr9 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec ; ; GFX10-LABEL: name: copy_s64_to_v64_unaligned ; GFX10: liveins: $sgpr8_sgpr9 @@ -499,13 +499,13 @@ body: | ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX90A-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec ; - ; GFX940-LABEL: name: copy_s128_to_v128_unaligned - ; GFX940: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 - ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 - ; GFX940-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec + ; GFX942-LABEL: name: copy_s128_to_v128_unaligned + ; GFX942: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11 + ; GFX942-NEXT: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec ; ; GFX10-LABEL: name: copy_s128_to_v128_unaligned ; GFX10: liveins: $sgpr8_sgpr9_sgpr10_sgpr11 @@ -537,12 +537,12 @@ body: | ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10 ; GFX90A-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec ; - ; GFX940-LABEL: name: copy_v96_to_v96_unaligned - ; GFX940: liveins: $vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10 - ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec + ; GFX942-LABEL: name: copy_v96_to_v96_unaligned + ; GFX942: liveins: $vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10 + ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec ; ; GFX10-LABEL: name: copy_v96_to_v96_unaligned ; GFX10: liveins: $vgpr8_vgpr9_vgpr10 @@ -573,12 +573,12 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9 ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec ; - ; GFX940-LABEL: name: copy_v96_unaligned_to_v96 - ; GFX940: liveins: $vgpr7_vgpr8_vgpr9 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec + ; GFX942-LABEL: name: copy_v96_unaligned_to_v96 + ; GFX942: liveins: $vgpr7_vgpr8_vgpr9 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec ; ; GFX10-LABEL: name: copy_v96_unaligned_to_v96 ; GFX10: liveins: $vgpr7_vgpr8_vgpr9 @@ -609,12 +609,12 @@ body: | ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; - ; GFX940-LABEL: name: copy_s96_to_v96 - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec + ; GFX942-LABEL: name: copy_s96_to_v96 + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; ; GFX10-LABEL: name: copy_s96_to_v96 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2 @@ -645,12 +645,12 @@ body: | ; GFX90A-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; - ; GFX940-LABEL: name: copy_s96_to_v96_unaligned - ; GFX940: liveins: $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec + ; GFX942-LABEL: name: copy_s96_to_v96_unaligned + ; GFX942: liveins: $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2 + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec ; ; GFX10-LABEL: name: copy_s96_to_v96_unaligned ; GFX10: liveins: $sgpr0_sgpr1_sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index b64968c9336b9..c954e1fe124ef 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -71,12 +71,6 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s @@ -174,12 +168,6 @@ ; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" ; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-" ; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+" -; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940" -; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-" -; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+" -; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941" -; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-" -; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+" ; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942" ; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-" ; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+" diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll index c7422786d344e..34b794705e983 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX940 +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX942 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS @@ -74,7 +74,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) { ; DPP64: v_mov_b32_dpp ; GFX90A: v_add_co_u32_e32 ; GFX90A: v_addc_co_u32_e32 -; GFX940: v_lshl_add_u64 +; GFX942: v_lshl_add_u64 ; GFX10PLUS: v_mov_b32_dpp ; GFX10PLUS: v_add_co_u32 ; GFX10PLUS: v_add_co_ci_u32_e32 diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir index 9a6a54bbc4e49..84da231c95a62 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir @@ -1,5 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN --- # GCN-LABEL: name: dpp64_old_impdef diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 99344f16d4cd6..65039b4716941 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -54,8 +54,6 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx909 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s -; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s -; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX950 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s @@ -137,8 +135,6 @@ ; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31) ; GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F) ; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32) -; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40) -; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B) ; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C) ; GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F) ; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33) diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll index 3ad2a9df764be..c4479b3a51d00 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll @@ -9,8 +9,8 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s -; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s -; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX942 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx942 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX942 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s @@ -42,10 +42,10 @@ ; SRAM-ECC-GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F) ; SRAM-ECC-GFX90A: ] -; SRAM-ECC-GFX940: Flags [ -; SRAM-ECC-GFX940: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) -; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40) -; SRAM-ECC-GFX940: ] +; SRAM-ECC-GFX942: Flags [ +; SRAM-ECC-GFX942: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C) +; SRAM-ECC-GFX942: ] ; SRAM-ECC-GFX950: Flags [ ; SRAM-ECC-GFX950: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index 001a72e360976..2f8ad7f56478a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -5,7 +5,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir index 1456bbc369b6a..aecff1b13171d 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir @@ -5,7 +5,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index b5a9f02711016..12e8d24cb3675 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -6,7 +6,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=MUBUFW64,GFX10 %s # FIXME: Test in wave32 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX940 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX942 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX11 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX12 %s @@ -512,14 +512,14 @@ body: | ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX11: liveins: $sgpr8 @@ -596,14 +596,14 @@ body: | ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX11: liveins: $sgpr8 @@ -681,13 +681,13 @@ body: | ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr - ; GFX940: liveins: $vgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr + ; GFX942: liveins: $vgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX11: liveins: $vgpr8 @@ -765,13 +765,13 @@ body: | ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp - ; GFX940: liveins: $vgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp + ; GFX942: liveins: $vgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX11: liveins: $vgpr8 @@ -849,13 +849,13 @@ body: | ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc - ; GFX940: liveins: $vgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc + ; GFX942: liveins: $vgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX11: liveins: $vgpr8 @@ -1019,10 +1019,10 @@ body: | ; GFX10-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX940: $sgpr4 = S_MOV_B32 72 - ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp + ; GFX942: $sgpr4 = S_MOV_B32 72 + ; GFX942-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX11: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec @@ -1109,17 +1109,17 @@ body: | ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec - ; GFX940-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) - ; GFX940-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required + ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec + ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) + ; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 @@ -1714,12 +1714,12 @@ body: | ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64__fi_sgpr_func - ; GFX940: liveins: $sgpr4 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64__fi_sgpr_func + ; GFX942: liveins: $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX11: liveins: $sgpr4 @@ -1902,14 +1902,14 @@ body: | ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX10-NEXT: S_ENDPGM 0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register - ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec - ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register + ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec + ; GFX942-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C @@ -2006,14 +2006,14 @@ body: | ; GFX10-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX10-NEXT: S_ENDPGM 0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register - ; GFX940: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc - ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec - ; GFX940-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register + ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc + ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec + ; GFX942-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C @@ -2137,21 +2137,21 @@ body: | ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 - ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc - ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32 - ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc - ; GFX940-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec - ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 - ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec + ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX11: liveins: $sgpr4, $sgpr5 @@ -2290,21 +2290,21 @@ body: | ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; - ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 - ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc - ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32 - ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc - ; GFX940-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec - ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 - ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 - ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 + ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX942-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec + ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 @@ -2444,21 +2444,21 @@ body: | ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 - ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc - ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32 - ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc - ; GFX940-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec - ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 - ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 - ; GFX940-NEXT: SI_RETURN implicit $vgpr0 + ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec + ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX11: liveins: $sgpr4, $sgpr5 @@ -2597,21 +2597,21 @@ body: | ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; - ; GFX940-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: $sgpr4 = frame-setup COPY $sgpr33 - ; GFX940-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc - ; GFX940-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX940-NEXT: $sgpr5 = frame-setup COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-setup COPY $sgpr32 - ; GFX940-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX940-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc - ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec - ; GFX940-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; GFX940-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 - ; GFX940-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 - ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc + ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc + ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec + ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 + ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 + ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index b7a5cf963138f..6a4671058dc0e 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUF %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUF %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll index ef180cef7ed2a..997432de65283 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): @@ -33,18 +33,18 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da } define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic ; GFX11: bb.0 (%ir-block.0): @@ -63,17 +63,17 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data } define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: S_ENDPGM 0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): @@ -91,18 +91,18 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da } define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 { - ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) - ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) + ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll index d64becc74ddc2..36714b386e7e5 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll @@ -1,153 +1,153 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret void } define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data) ret double %ret } define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate(ptr %ptr, double %data) #0 { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw_noprivate + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret } define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw__noprivate(ptr %ptr, double %data) #0 { - ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY7]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw__noprivate + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY6]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll index 707cae9534830..02524bf71b074 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -26,14 +26,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amd ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -202,14 +202,14 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -388,17 +388,17 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -582,14 +582,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -788,14 +788,14 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1005,17 +1005,17 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1228,14 +1228,14 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1417,14 +1417,14 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1636,14 +1636,14 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32_maybe_remote: ; GFX11: ; %bb.0: @@ -1800,14 +1800,14 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1951,14 +1951,14 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -2168,14 +2168,14 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -2336,14 +2336,14 @@ define float @flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2512,14 +2512,14 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2698,17 +2698,17 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2892,14 +2892,14 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3098,14 +3098,14 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3315,17 +3315,17 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3538,14 +3538,14 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3727,14 +3727,14 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3947,14 +3947,14 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -4136,14 +4136,14 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -4355,14 +4355,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ig ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -4519,14 +4519,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_i ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -4676,14 +4676,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4840,14 +4840,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4997,14 +4997,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -5173,14 +5173,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -5379,14 +5379,14 @@ define float @flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdg ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -5527,14 +5527,14 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -5717,43 +5717,43 @@ define double @flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_4 -; GFX940-NEXT: .LBB30_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB30_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB30_2 -; GFX940-NEXT: .LBB30_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_4 +; GFX942-NEXT: .LBB30_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB30_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB30_2 +; GFX942-NEXT: .LBB30_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6115,43 +6115,43 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_4 -; GFX940-NEXT: .LBB31_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB31_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB31_2 -; GFX940-NEXT: .LBB31_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_4 +; GFX942-NEXT: .LBB31_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB31_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB31_2 +; GFX942-NEXT: .LBB31_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6536,44 +6536,44 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_4 -; GFX940-NEXT: .LBB32_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB32_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB32_2 -; GFX940-NEXT: .LBB32_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_4 +; GFX942-NEXT: .LBB32_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB32_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB32_2 +; GFX942-NEXT: .LBB32_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6951,40 +6951,40 @@ define void @flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_4 -; GFX940-NEXT: .LBB33_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB33_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB33_2 -; GFX940-NEXT: .LBB33_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_4 +; GFX942-NEXT: .LBB33_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB33_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB33_2 +; GFX942-NEXT: .LBB33_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7346,42 +7346,42 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_4 -; GFX940-NEXT: .LBB34_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB34_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB34_2 -; GFX940-NEXT: .LBB34_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_4 +; GFX942-NEXT: .LBB34_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB34_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB34_2 +; GFX942-NEXT: .LBB34_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7757,43 +7757,43 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_4 -; GFX940-NEXT: .LBB35_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB35_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB35_2 -; GFX940-NEXT: .LBB35_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_4 +; GFX942-NEXT: .LBB35_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB35_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB35_2 +; GFX942-NEXT: .LBB35_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8155,38 +8155,38 @@ define half @flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8445,40 +8445,40 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8744,41 +8744,41 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9040,37 +9040,37 @@ define void @flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9319,39 +9319,39 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9607,40 +9607,40 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9883,29 +9883,29 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10099,30 +10099,30 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_add_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_add_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10335,40 +10335,40 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10635,39 +10635,39 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10938,47 +10938,47 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11283,49 +11283,49 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11638,50 +11638,50 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11992,48 +11992,48 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12336,49 +12336,49 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12670,40 +12670,40 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_add_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_add_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12953,39 +12953,39 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13240,46 +13240,46 @@ define void @flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13577,49 +13577,49 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13933,48 +13933,48 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14242,14 +14242,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14431,14 +14431,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14623,17 +14623,17 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14829,14 +14829,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15010,14 +15010,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15198,17 +15198,17 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15402,14 +15402,14 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15597,14 +15597,14 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15787,14 +15787,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -15976,14 +15976,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr %pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -16157,14 +16157,14 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -16346,14 +16346,14 @@ define void @flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -16531,14 +16531,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16806,14 +16806,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -17084,17 +17084,17 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -17376,14 +17376,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -17643,14 +17643,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -17917,17 +17917,17 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -18207,14 +18207,14 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -18488,14 +18488,14 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -18764,14 +18764,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory( ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -19039,14 +19039,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr %p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -19306,14 +19306,14 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_m ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -19581,14 +19581,14 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_pk_add_bf16 v[0:1], v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll index 5aa9be627594d..6ead5b93a0e39 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -26,30 +26,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -168,30 +168,30 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -316,37 +316,37 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v1, v2, v2 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v1, v2, v2 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -479,29 +479,29 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -619,29 +619,29 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -766,35 +766,35 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -930,30 +930,30 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1081,29 +1081,29 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1230,30 +1230,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1422,30 +1422,30 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amd ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1568,30 +1568,30 @@ define float @flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1710,30 +1710,30 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1858,37 +1858,37 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v1, v2, v2 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v1, v2, v2 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2021,29 +2021,29 @@ define void @flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2161,29 +2161,29 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2308,35 +2308,35 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2472,30 +2472,30 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2623,29 +2623,29 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2822,45 +2822,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: .LBB18_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB18_2 -; GFX940-NEXT: .LBB18_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: .LBB18_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB18_2 +; GFX942-NEXT: .LBB18_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3220,45 +3220,45 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_4 -; GFX940-NEXT: .LBB19_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB19_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB19_2 -; GFX940-NEXT: .LBB19_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_4 +; GFX942-NEXT: .LBB19_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB19_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB19_2 +; GFX942-NEXT: .LBB19_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3633,46 +3633,46 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_4 -; GFX940-NEXT: .LBB20_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB20_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB20_2 -; GFX940-NEXT: .LBB20_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_4 +; GFX942-NEXT: .LBB20_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB20_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB20_2 +; GFX942-NEXT: .LBB20_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4041,42 +4041,42 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_4 -; GFX940-NEXT: .LBB21_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB21_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB21_2 -; GFX940-NEXT: .LBB21_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_4 +; GFX942-NEXT: .LBB21_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB21_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB21_2 +; GFX942-NEXT: .LBB21_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4431,44 +4431,44 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_4 -; GFX940-NEXT: .LBB22_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB22_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB22_2 -; GFX940-NEXT: .LBB22_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_4 +; GFX942-NEXT: .LBB22_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB22_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB22_2 +; GFX942-NEXT: .LBB22_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4837,45 +4837,45 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_4 -; GFX940-NEXT: .LBB23_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB23_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB23_2 -; GFX940-NEXT: .LBB23_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_4 +; GFX942-NEXT: .LBB23_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB23_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB23_2 +; GFX942-NEXT: .LBB23_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5236,45 +5236,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_4 -; GFX940-NEXT: .LBB24_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB24_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB24_2 -; GFX940-NEXT: .LBB24_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_4 +; GFX942-NEXT: .LBB24_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB24_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB24_2 +; GFX942-NEXT: .LBB24_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -5659,45 +5659,45 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__am ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_4 -; GFX940-NEXT: .LBB25_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB25_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB25_2 -; GFX940-NEXT: .LBB25_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_4 +; GFX942-NEXT: .LBB25_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB25_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_max_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB25_2 +; GFX942-NEXT: .LBB25_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6039,40 +6039,40 @@ define half @flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6343,42 +6343,42 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6656,43 +6656,43 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6967,39 +6967,39 @@ define void @flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7262,41 +7262,41 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7566,42 +7566,42 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7859,32 +7859,32 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f16_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f16_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8094,31 +8094,31 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v4, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v4 -; GFX940-NEXT: v_and_or_b32 v2, v3, s2, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v4, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v4 +; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8339,42 +8339,42 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8654,41 +8654,41 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8970,47 +8970,47 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9316,49 +9316,49 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9672,50 +9672,50 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10024,46 +10024,46 @@ define void @flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10359,48 +10359,48 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10704,49 +10704,49 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11039,40 +11039,40 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11323,39 +11323,39 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11617,49 +11617,49 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11974,48 +11974,48 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12303,31 +12303,31 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memo ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12538,31 +12538,31 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12776,38 +12776,38 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v2, v2 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_pk_max_f16 v0, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v2, v2 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_pk_max_f16 v0, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13031,30 +13031,30 @@ define void @flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13257,30 +13257,30 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13490,36 +13490,36 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13743,31 +13743,31 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13983,30 +13983,30 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14242,47 +14242,47 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_m ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14588,47 +14588,47 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14937,54 +14937,54 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v3, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v3, v0, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v3, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v3, v0, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15303,46 +15303,46 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15639,46 +15639,46 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB58_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB58_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB58_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15982,52 +15982,52 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB59_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB59_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB59_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB59_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16345,47 +16345,47 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB60_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB60_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB60_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB60_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16696,46 +16696,46 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB61_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB61_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB61_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB61_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll index 065596776cf73..1fc9ed70e009c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -26,30 +26,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -168,30 +168,30 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -316,37 +316,37 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v1, v2, v2 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v1, v2, v2 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -479,29 +479,29 @@ define void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -619,29 +619,29 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -766,35 +766,35 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -930,30 +930,30 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1081,29 +1081,29 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1230,30 +1230,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1422,30 +1422,30 @@ define float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amd ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1568,30 +1568,30 @@ define float @flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1710,30 +1710,30 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1858,37 +1858,37 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v1, v2, v2 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_max_f32_e32 v0, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v1, v2, v2 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_max_f32_e32 v0, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2021,29 +2021,29 @@ define void @flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2161,29 +2161,29 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2308,35 +2308,35 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2472,30 +2472,30 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2623,29 +2623,29 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2822,45 +2822,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: .LBB18_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB18_2 -; GFX940-NEXT: .LBB18_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: .LBB18_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB18_2 +; GFX942-NEXT: .LBB18_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3220,45 +3220,45 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_4 -; GFX940-NEXT: .LBB19_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB19_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB19_2 -; GFX940-NEXT: .LBB19_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_4 +; GFX942-NEXT: .LBB19_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB19_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB19_2 +; GFX942-NEXT: .LBB19_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3633,46 +3633,46 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_4 -; GFX940-NEXT: .LBB20_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB20_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB20_2 -; GFX940-NEXT: .LBB20_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_4 +; GFX942-NEXT: .LBB20_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB20_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB20_2 +; GFX942-NEXT: .LBB20_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4041,42 +4041,42 @@ define void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_4 -; GFX940-NEXT: .LBB21_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB21_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB21_2 -; GFX940-NEXT: .LBB21_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_4 +; GFX942-NEXT: .LBB21_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB21_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB21_2 +; GFX942-NEXT: .LBB21_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4431,44 +4431,44 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_4 -; GFX940-NEXT: .LBB22_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB22_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB22_2 -; GFX940-NEXT: .LBB22_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_4 +; GFX942-NEXT: .LBB22_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB22_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB22_2 +; GFX942-NEXT: .LBB22_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4837,45 +4837,45 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_4 -; GFX940-NEXT: .LBB23_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB23_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB23_2 -; GFX940-NEXT: .LBB23_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_4 +; GFX942-NEXT: .LBB23_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB23_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB23_2 +; GFX942-NEXT: .LBB23_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5236,45 +5236,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr %ptr, ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_4 -; GFX940-NEXT: .LBB24_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB24_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB24_2 -; GFX940-NEXT: .LBB24_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_4 +; GFX942-NEXT: .LBB24_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB24_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB24_2 +; GFX942-NEXT: .LBB24_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -5659,45 +5659,45 @@ define double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__am ; GFX12-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_3 -; GFX940-NEXT: ; %bb.1: ; %Flow -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_4 -; GFX940-NEXT: .LBB25_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB25_3: ; %atomicrmw.global -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB25_2 -; GFX940-NEXT: .LBB25_4: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] -; GFX940-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_3 +; GFX942-NEXT: ; %bb.1: ; %Flow +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_4 +; GFX942-NEXT: .LBB25_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB25_3: ; %atomicrmw.global +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_min_f64 v[0:1], v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB25_2 +; GFX942-NEXT: .LBB25_4: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] +; GFX942-NEXT: v_min_f64 v[2:3], v[4:5], v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6039,40 +6039,40 @@ define half @flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6343,42 +6343,42 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6656,43 +6656,43 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6967,39 +6967,39 @@ define void @flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_min_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_min_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7262,41 +7262,41 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7566,42 +7566,42 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7859,32 +7859,32 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f16_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v3, v3, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f16_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v3, v3, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8094,31 +8094,31 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v4, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v4 -; GFX940-NEXT: v_and_or_b32 v2, v3, s2, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v4, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v4 +; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8339,42 +8339,42 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8654,41 +8654,41 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8970,47 +8970,47 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9316,49 +9316,49 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9672,50 +9672,50 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10024,46 +10024,46 @@ define void @flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10359,48 +10359,48 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10704,49 +10704,49 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11039,40 +11039,40 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11323,39 +11323,39 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11617,49 +11617,49 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11974,48 +11974,48 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12303,31 +12303,31 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memo ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12538,31 +12538,31 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12776,38 +12776,38 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v2, v2 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_pk_max_f16 v0, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v0, v1 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v2, v2 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_pk_max_f16 v0, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v0, v1 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13031,30 +13031,30 @@ define void @flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13257,30 +13257,30 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13490,36 +13490,36 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13743,31 +13743,31 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13983,30 +13983,30 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14242,47 +14242,47 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_m ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14588,47 +14588,47 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14937,54 +14937,54 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v3, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v3, v0, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v3, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v3, v0, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15303,46 +15303,46 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15639,46 +15639,46 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB58_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB58_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB58_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15982,52 +15982,52 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB59_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB59_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB59_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB59_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16345,47 +16345,47 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB60_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB60_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB60_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB60_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16696,46 +16696,46 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB61_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB61_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB61_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB61_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll index cd1a161346667..8d2963ce7db35 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -43,28 +43,28 @@ define float @flat_agent_atomic_fsub_ret_f32(ptr %ptr, float %val) #0 { ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32: ; GFX11: ; %bb.0: @@ -238,28 +238,28 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -437,35 +437,35 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_sub_f32_e32 v0, v1, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg: ; GFX11: ; %bb.0: @@ -652,27 +652,27 @@ define void @flat_agent_atomic_fsub_noret_f32(ptr %ptr, float %val) #0 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32: ; GFX11: ; %bb.0: @@ -837,27 +837,27 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1029,33 +1029,33 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v5, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v5, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg: ; GFX11: ; %bb.0: @@ -1242,28 +1242,28 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1442,27 +1442,27 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1642,28 +1642,28 @@ define float @flat_agent_atomic_fsub_ret_f32__ftz(ptr %ptr, float %val) #1 { ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__ftz: ; GFX11: ; %bb.0: @@ -1837,28 +1837,28 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -2036,35 +2036,35 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_sub_f32_e32 v0, v1, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_sub_f32_e32 v0, v1, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: ; GFX11: ; %bb.0: @@ -2251,27 +2251,27 @@ define void @flat_agent_atomic_fsub_noret_f32__ftz(ptr %ptr, float %val) #1 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__ftz: ; GFX11: ; %bb.0: @@ -2436,27 +2436,27 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -2628,33 +2628,33 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v5, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v5, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: ; GFX11: ; %bb.0: @@ -2841,28 +2841,28 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -3041,27 +3041,27 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -3266,52 +3266,52 @@ define double @flat_agent_atomic_fsub_ret_f64(ptr %ptr, double %val) #0 { ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB16_4 -; GFX940-NEXT: ; %bb.1: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[4:5], v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB16_2: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB16_2 -; GFX940-NEXT: ; %bb.3: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: .LBB16_4: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB16_6 -; GFX940-NEXT: ; %bb.5: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[4:5], v6, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v6, v[0:1], off sc0 sc1 -; GFX940-NEXT: .LBB16_6: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB16_4 +; GFX942-NEXT: ; %bb.1: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[4:5], v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB16_2: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB16_2 +; GFX942-NEXT: ; %bb.3: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: .LBB16_4: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB16_6 +; GFX942-NEXT: ; %bb.5: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[4:5], v6, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v6, v[0:1], off +; GFX942-NEXT: .LBB16_6: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64: ; GFX11: ; %bb.0: @@ -3682,56 +3682,56 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_3 -; GFX940-NEXT: ; %bb.1: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_6 -; GFX940-NEXT: .LBB17_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB17_3: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[0:1], v[4:5] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB17_4: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[8:9], v[0:1] -; GFX940-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB17_4 -; GFX940-NEXT: ; %bb.5: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB17_2 -; GFX940-NEXT: .LBB17_6: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_3 +; GFX942-NEXT: ; %bb.1: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_6 +; GFX942-NEXT: .LBB17_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB17_3: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[0:1], v[4:5] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB17_4: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] +; GFX942-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB17_4 +; GFX942-NEXT: ; %bb.5: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB17_2 +; GFX942-NEXT: .LBB17_6: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_pos: ; GFX11: ; %bb.0: @@ -4129,57 +4129,57 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_3 -; GFX940-NEXT: ; %bb.1: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_6 -; GFX940-NEXT: .LBB18_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB18_3: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[0:1], v[4:5] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB18_4: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[8:9], v[0:1] -; GFX940-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB18_4 -; GFX940-NEXT: ; %bb.5: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB18_2 -; GFX940-NEXT: .LBB18_6: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_3 +; GFX942-NEXT: ; %bb.1: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_6 +; GFX942-NEXT: .LBB18_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB18_3: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[0:1], v[4:5] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB18_4: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] +; GFX942-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB18_4 +; GFX942-NEXT: ; %bb.5: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB18_2 +; GFX942-NEXT: .LBB18_6: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[2:3], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f64__offset12b_neg: ; GFX11: ; %bb.0: @@ -4570,53 +4570,53 @@ define void @flat_agent_atomic_fsub_noret_f64(ptr %ptr, double %val) #0 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_3 -; GFX940-NEXT: ; %bb.1: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_6 -; GFX940-NEXT: .LBB19_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB19_3: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[6:7], v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB19_4: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB19_4 -; GFX940-NEXT: ; %bb.5: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB19_2 -; GFX940-NEXT: .LBB19_6: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_3 +; GFX942-NEXT: ; %bb.1: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_6 +; GFX942-NEXT: .LBB19_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB19_3: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[6:7], v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB19_4: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB19_4 +; GFX942-NEXT: ; %bb.5: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB19_2 +; GFX942-NEXT: .LBB19_6: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64: ; GFX11: ; %bb.0: @@ -4991,55 +4991,55 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7f8 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_3 -; GFX940-NEXT: ; %bb.1: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_6 -; GFX940-NEXT: .LBB20_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB20_3: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[6:7], v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB20_4: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB20_4 -; GFX940-NEXT: ; %bb.5: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB20_2 -; GFX940-NEXT: .LBB20_6: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7f8 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_3 +; GFX942-NEXT: ; %bb.1: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_6 +; GFX942-NEXT: .LBB20_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB20_3: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[6:7], v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB20_4: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB20_4 +; GFX942-NEXT: ; %bb.5: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB20_2 +; GFX942-NEXT: .LBB20_6: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_pos: ; GFX11: ; %bb.0: @@ -5428,56 +5428,56 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_3 -; GFX940-NEXT: ; %bb.1: ; %Flow3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_6 -; GFX940-NEXT: .LBB21_2: ; %atomicrmw.phi -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] -; GFX940-NEXT: .LBB21_3: ; %atomicrmw.global -; GFX940-NEXT: flat_load_dwordx2 v[6:7], v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: .LBB21_4: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB21_4 -; GFX940-NEXT: ; %bb.5: ; %Flow -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3 -; GFX940-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB21_2 -; GFX940-NEXT: .LBB21_6: ; %atomicrmw.private -; GFX940-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v4, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] -; GFX940-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], src_private_base +; GFX942-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_3 +; GFX942-NEXT: ; %bb.1: ; %Flow3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_6 +; GFX942-NEXT: .LBB21_2: ; %atomicrmw.phi +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] +; GFX942-NEXT: .LBB21_3: ; %atomicrmw.global +; GFX942-NEXT: flat_load_dwordx2 v[6:7], v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: .LBB21_4: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB21_4 +; GFX942-NEXT: ; %bb.5: ; %Flow +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX942-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB21_2 +; GFX942-NEXT: .LBB21_6: ; %atomicrmw.private +; GFX942-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v4, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; GFX942-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f64__offset12b_neg: ; GFX11: ; %bb.0: @@ -5852,38 +5852,38 @@ define half @flat_agent_atomic_fsub_ret_f16(ptr %ptr, half %val) #0 { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16: ; GFX11: ; %bb.0: @@ -6142,40 +6142,40 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB23_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB23_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -6441,41 +6441,41 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -6737,37 +6737,37 @@ define void @flat_agent_atomic_fsub_noret_f16(ptr %ptr, half %val) #0 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB25_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16: ; GFX11: ; %bb.0: @@ -7016,39 +7016,39 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -7304,40 +7304,40 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -7582,30 +7582,30 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4: ; GFX11: ; %bb.0: @@ -7802,29 +7802,29 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_sub_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos: ; GFX11: ; %bb.0: @@ -8032,40 +8032,40 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: flat_load_dword v4, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: flat_load_dword v4, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -8332,39 +8332,39 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -8635,47 +8635,47 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16(ptr %ptr, bfloat %val) #0 { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16: ; GFX11: ; %bb.0: @@ -8980,49 +8980,49 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -9335,50 +9335,50 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -9686,46 +9686,46 @@ define void @flat_agent_atomic_fsub_noret_bf16(ptr %ptr, bfloat %val) #0 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v4, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16: ; GFX11: ; %bb.0: @@ -10020,48 +10020,48 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat % ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -10364,49 +10364,49 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat % ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -10698,40 +10698,40 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: ; GFX11: ; %bb.0: @@ -10981,39 +10981,39 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr, ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: ; GFX11: ; %bb.0: @@ -11274,49 +11274,49 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v5, v[0:1], v[6:7] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -11630,48 +11630,48 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -11956,28 +11956,28 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16(ptr %ptr, <2 x half> %val) # ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16: ; GFX11: ; %bb.0: @@ -12176,28 +12176,28 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -12399,35 +12399,35 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_pk_add_f16 v0, v1, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_pk_add_f16 v0, v1, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[0:1] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -12638,27 +12638,27 @@ define void @flat_agent_atomic_fsub_noret_v2f16(ptr %ptr, <2 x half> %val) #0 { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16: ; GFX11: ; %bb.0: @@ -12847,27 +12847,27 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -13063,33 +13063,33 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v5, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v5, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -13300,28 +13300,28 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -13524,27 +13524,27 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v5, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -13769,47 +13769,47 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16(ptr %ptr, <2 x bfloat> %v ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16: ; GFX11: ; %bb.0: @@ -14115,47 +14115,47 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -14464,54 +14464,54 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc -; GFX940-NEXT: flat_load_dword v0, v[0:1] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v6, v0, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v6, v6, v0, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v3, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v3, v0, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v4 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v5, vcc +; GFX942-NEXT: flat_load_dword v0, v[0:1] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[4:5], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v6, v0, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v0 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v6, v6, v0, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v3, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v0, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v3, v0, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v0, v[4:5], v[6:7] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v0, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -14830,46 +14830,46 @@ define void @flat_agent_atomic_fsub_noret_v2bf16(ptr %ptr, <2 x bfloat> %val) #0 ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16: ; GFX11: ; %bb.0: @@ -15166,46 +15166,46 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -15509,52 +15509,52 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc -; GFX940-NEXT: flat_load_dword v3, v[4:5] -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_co_u32_e32 v4, vcc, 0xfffff800, v0 +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_addc_co_u32_e32 v5, vcc, -1, v1, vcc +; GFX942-NEXT: flat_load_dword v3, v[4:5] +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -15872,47 +15872,47 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v3, v[0:1], v[6:7] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_ret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -16223,46 +16223,46 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: flat_load_dword v3, v[0:1] offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: flat_system_atomic_fsub_noret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index ef3657433e8b7..346b69c362c04 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GFX942-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GFX942-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12-SDAG @@ -12,46 +12,46 @@ declare i32 @llvm.amdgcn.workitem.id.x() define amdgpu_kernel void @soff1_voff1(i32 %soff) { -; GFX940-SDAG-LABEL: soff1_voff1: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff1_voff1: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff1_voff1: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff1_voff1: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff1_voff1: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -140,48 +140,48 @@ bb: } define amdgpu_kernel void @soff1_voff2(i32 %soff) { -; GFX940-SDAG-LABEL: soff1_voff2: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 -; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff1_voff2: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 +; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff1_voff2: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff1_voff2: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff1_voff2: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -276,48 +276,48 @@ bb: } define amdgpu_kernel void @soff1_voff4(i32 %soff) { -; GFX940-SDAG-LABEL: soff1_voff4: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 -; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff1_voff4: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff1_voff4: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff1_voff4: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff1_voff4: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -412,48 +412,48 @@ bb: } define amdgpu_kernel void @soff2_voff1(i32 %soff) { -; GFX940-SDAG-LABEL: soff2_voff1: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff2_voff1: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff2_voff1: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff2_voff1: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff2_voff1: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -547,49 +547,49 @@ bb: } define amdgpu_kernel void @soff2_voff2(i32 %soff) { -; GFX940-SDAG-LABEL: soff2_voff2: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff2_voff2: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff2_voff2: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff2_voff2: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff2_voff2: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -688,49 +688,49 @@ bb: } define amdgpu_kernel void @soff2_voff4(i32 %soff) { -; GFX940-SDAG-LABEL: soff2_voff4: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff2_voff4: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff2_voff4: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff2_voff4: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff2_voff4: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -829,48 +829,48 @@ bb: } define amdgpu_kernel void @soff4_voff1(i32 %soff) { -; GFX940-SDAG-LABEL: soff4_voff1: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff4_voff1: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff4_voff1: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff4_voff1: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v3, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff4_voff1: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -964,49 +964,49 @@ bb: } define amdgpu_kernel void @soff4_voff2(i32 %soff) { -; GFX940-SDAG-LABEL: soff4_voff2: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff4_voff2: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v2 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-SDAG-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff4_voff2: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff4_voff2: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff4_voff2: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -1105,48 +1105,48 @@ bb: } define amdgpu_kernel void @soff4_voff4(i32 %soff) { -; GFX940-SDAG-LABEL: soff4_voff4: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v3, s0 -; GFX940-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v3 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: scratch_store_byte v0, v2, off offset:2 sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff4_voff4: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, s0 +; GFX942-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v3 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off offset:1 sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: scratch_store_byte v0, v2, off offset:2 sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff4_voff4: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 -; GFX940-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 2 -; GFX940-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff4_voff4: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 +; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v1, 2, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; GFX942-GISEL-NEXT: scratch_store_byte v1, v2, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: v_add_u32_e32 v0, 4, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 4 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff4_voff4: ; GFX11-SDAG: ; %bb.0: ; %bb @@ -1245,29 +1245,29 @@ bb: } define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { -; GFX940-SDAG-LABEL: soff1_voff1_negative: -; GFX940-SDAG: ; %bb.0: ; %bb -; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-SDAG-NEXT: v_add_u32_e32 v0, -1, v0 -; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: soff1_voff1_negative: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-SDAG-NEXT: v_add_u32_e32 v0, -1, v0 +; GFX942-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: soff1_voff1_negative: -; GFX940-GISEL: ; %bb.0: ; %bb -; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX940-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1 -; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: soff1_voff1_negative: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1 +; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX11-SDAG-LABEL: soff1_voff1_negative: ; GFX11-SDAG: ; %bb.0: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 5415af02ef89c..c39a03ee8008c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX12 %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefix=GFX9-PAL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -mattr=-promote-alloca < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -mattr=-promote-alloca < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1010-PAL %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX10-PAL,GFX1030-PAL %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-promote-alloca -mattr=+enable-flat-scratch < %s | FileCheck --check-prefixes=GFX11-PAL %s @@ -104,19 +104,19 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: zero_init_kernel: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:32 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: zero_init_kernel: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:32 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:16 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_kernel: ; GFX1010-PAL: ; %bb.0: @@ -297,21 +297,21 @@ define void @zero_init_foo() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: zero_init_foo: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: zero_init_foo: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: zero_init_foo: ; GFX10-PAL: ; %bb.0: @@ -456,19 +456,19 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_sindex_kernel: ; GFX10-PAL: ; %bb.0: ; %bb @@ -604,17 +604,17 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_sindex_foo: ; GFX10-PAL: ; %bb.0: ; %bb @@ -744,17 +744,17 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_and_b32_e32 v0, 0xffc, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_vindex_kernel: ; GFX10-PAL: ; %bb.0: ; %bb @@ -885,19 +885,19 @@ define void @store_load_vindex_foo(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s32 -; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s32 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_foo: ; GFX10-PAL: ; %bb.0: ; %bb @@ -994,13 +994,13 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: private_ptr_foo: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0x41200000 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: private_ptr_foo: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: private_ptr_foo: ; GFX10-PAL: ; %bb.0: @@ -1135,21 +1135,21 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:304 ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: zero_init_small_offset_kernel: -; GFX940: ; %bb.0: -; GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:272 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:288 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:304 sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: zero_init_small_offset_kernel: +; GFX942: ; %bb.0: +; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:272 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:288 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:304 +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_small_offset_kernel: ; GFX1010-PAL: ; %bb.0: @@ -1351,23 +1351,23 @@ define void @zero_init_small_offset_foo() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: zero_init_small_offset_foo: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: zero_init_small_offset_foo: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s32 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: zero_init_small_offset_foo: ; GFX10-PAL: ; %bb.0: @@ -1542,23 +1542,23 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_small_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x100 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x100 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_small_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x100 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x100 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_kernel: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -1759,21 +1759,21 @@ define amdgpu_ps void @store_load_sindex_small_offset_foo(i32 inreg %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_small_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v0, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x100 -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x100 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_small_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x100 +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x100 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_small_offset_foo: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -1956,19 +1956,19 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_small_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v1, off, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_and_b32_e32 v0, 0xffc, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:256 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_u32_e32 v0, 0x100, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_small_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:256 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_u32_e32 v0, 0x100, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_vindex_small_offset_kernel: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -2145,22 +2145,22 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_small_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s32, 0x100 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_small_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v1, off, s32 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_add_i32 s0, s32, 0x100 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_small_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb @@ -2333,22 +2333,22 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: zero_init_large_offset_kernel: -; GFX940: ; %bb.0: -; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: zero_init_large_offset_kernel: +; GFX942: ; %bb.0: +; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: s_movk_i32 s0, 0x4004 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: zero_init_large_offset_kernel: ; GFX1010-PAL: ; %bb.0: @@ -2568,27 +2568,27 @@ define void @zero_init_large_offset_foo() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: zero_init_large_offset_foo: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, off, s32 offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 0 -; GFX940-NEXT: s_mov_b32 s1, s0 -; GFX940-NEXT: s_mov_b32 s2, s0 -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 sc0 sc1 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 sc0 sc1 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 sc0 sc1 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: zero_init_large_offset_foo: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, off, s32 offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:48 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-PAL-LABEL: zero_init_large_offset_foo: ; GFX1010-PAL: ; %bb.0: @@ -2796,23 +2796,23 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_large_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x4004 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_large_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x4004 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x4004 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_kernel: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -3013,21 +3013,21 @@ define amdgpu_ps void @store_load_sindex_large_offset_foo(i32 inreg %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_sindex_large_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_addk_i32 s1, 0x4004 -; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_addk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_sindex_large_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_lshl_b32 s1, s0, 2 +; GFX942-NEXT: s_and_b32 s0, s0, 15 +; GFX942-NEXT: s_addk_i32 s1, 0x4004 +; GFX942-NEXT: v_mov_b32_e32 v0, 15 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_addk_i32 s0, 0x4004 +; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_sindex_large_offset_foo: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -3211,20 +3211,20 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vindex_large_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_and_b32_e32 v0, 0xffc, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: scratch_store_dword v0, v1, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_u32_e32 v0, 0x4004, v0 -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vindex_large_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: s_movk_i32 s0, 0x4004 +; GFX942-NEXT: scratch_store_dword v0, v1, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_u32_e32 v0, 0x4004, v0 +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_vindex_large_offset_kernel: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -3403,23 +3403,23 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_vindex_large_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: v_lshl_add_u32 v1, v0, 2, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_vindex_large_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: scratch_load_dword v1, off, s32 offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: v_lshl_add_u32 v1, v0, 2, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX942-NEXT: scratch_store_dword v1, v2, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX942-NEXT: scratch_load_dword v0, v0, s0 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_vindex_large_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb @@ -3566,18 +3566,18 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_large_imm_offset_kernel: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_mov_b32_e32 v0, 13 -; GFX940-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x3000 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:3716 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:3716 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_large_imm_offset_kernel: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0x3000 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:3716 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:3716 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: store_load_large_imm_offset_kernel: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -3738,19 +3738,19 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_large_imm_offset_foo: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 13 -; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0x3000 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_large_imm_offset_foo: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0x3000 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, s32 offset:3716 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, v0, s32 offset:3716 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX10-PAL: ; %bb.0: ; %bb @@ -3889,20 +3889,20 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: store_load_vidx_sidx_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: store_load_vidx_sidx_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off offset:1024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dword v0, v0, off offset:1024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: store_load_vidx_sidx_offset: ; GFX10-PAL: ; %bb.0: ; %bb @@ -4022,16 +4022,16 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i64_aligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i64_aligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_i64_aligned: ; GFX10-PAL: ; %bb.0: ; %bb @@ -4133,16 +4133,16 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i64_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i64_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 15 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_i64_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb @@ -4249,17 +4249,17 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_v3i32_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 1 -; GFX940-NEXT: v_mov_b32_e32 v3, 2 -; GFX940-NEXT: v_mov_b32_e32 v4, 3 -; GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_v3i32_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 1 +; GFX942-NEXT: v_mov_b32_e32 v3, 2 +; GFX942-NEXT: v_mov_b32_e32 v4, 3 +; GFX942-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_v3i32_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb @@ -4372,18 +4372,18 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_v4i32_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 1 -; GFX940-NEXT: v_mov_b32_e32 v3, 2 -; GFX940-NEXT: v_mov_b32_e32 v4, 3 -; GFX940-NEXT: v_mov_b32_e32 v5, 4 -; GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_v4i32_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 1 +; GFX942-NEXT: v_mov_b32_e32 v3, 2 +; GFX942-NEXT: v_mov_b32_e32 v4, 3 +; GFX942-NEXT: v_mov_b32_e32 v5, 4 +; GFX942-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-PAL-LABEL: store_load_v4i32_unaligned: ; GFX10-PAL: ; %bb.0: ; %bb @@ -4488,16 +4488,16 @@ define void @store_load_i32_negative_unaligned(ptr addrspace(5) nocapture %arg) ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i32_negative_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, -1, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i32_negative_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, -1, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-PAL-LABEL: store_load_i32_negative_unaligned: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -4610,16 +4610,16 @@ define void @store_load_i32_large_negative_unaligned(ptr addrspace(5) nocapture ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: store_load_i32_large_negative_unaligned: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 1 -; GFX940-NEXT: scratch_store_byte v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: store_load_i32_large_negative_unaligned: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xffffef7f, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 1 +; GFX942-NEXT: scratch_store_byte v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_ubyte v0, v0, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1010-PAL-LABEL: store_load_i32_large_negative_unaligned: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -4792,25 +4792,25 @@ define amdgpu_ps void @large_offset() { ; GFX9-PAL-NEXT: ;;#ASMEND ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: large_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_movk_i32 s0, 0x810 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: large_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_movk_i32 s0, 0x810 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_endpgm ; ; GFX1010-PAL-LABEL: large_offset: ; GFX1010-PAL: ; %bb.0: ; %bb @@ -4977,13 +4977,13 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_large_offset: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_add_i32 s0, s0, 0xffe8 -; GFX940-NEXT: scratch_load_dword v2, off, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_large_offset: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: sgpr_base_large_offset: ; GFX10-PAL: ; %bb.0: ; %entry @@ -5082,14 +5082,14 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_large_offset_split: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_and_b32 s0, s0, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x100f000 -; GFX940-NEXT: scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_large_offset_split: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_and_b32 s0, s0, -4 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x100f000 +; GFX942-NEXT: scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: sgpr_base_large_offset_split: ; GFX10-PAL: ; %bb.0: ; %entry @@ -5197,15 +5197,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_add_i32 s0, s0, s1 -; GFX940-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-NEXT: v_add_u32_e32 v0, 0xffe8, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_add_i32 s0, s0, s1 +; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-NEXT: v_add_u32_e32 v0, 0xffe8, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: ; GFX10-PAL: ; %bb.0: ; %bb @@ -5307,15 +5307,15 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: -; GFX940: ; %bb.0: ; %bb -; GFX940-NEXT: s_add_i32 s0, s0, s1 -; GFX940-NEXT: v_add_u32_e32 v0, s0, v0 -; GFX940-NEXT: v_add_u32_e32 v0, -16, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 15 -; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: +; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: s_add_i32 s0, s0, s1 +; GFX942-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX942-NEXT: v_add_u32_e32 v0, -16, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 15 +; GFX942-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: ; GFX10-PAL: ; %bb.0: ; %bb @@ -5408,13 +5408,13 @@ define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addr ; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-PAL-NEXT: s_endpgm ; -; GFX940-LABEL: sgpr_base_negative_offset: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_addk_i32 s0, 0xffe8 -; GFX940-NEXT: scratch_load_dword v2, off, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: sgpr_base_negative_offset: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_addk_i32 s0, 0xffe8 +; GFX942-NEXT: scratch_load_dword v2, off, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v[0:1], v2, off +; GFX942-NEXT: s_endpgm ; ; GFX10-PAL-LABEL: sgpr_base_negative_offset: ; GFX10-PAL: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index c26e2911ab3ea..66de7d535db4b 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s define float @v_fmaximum3_f32(float %a, float %b, float %c) { @@ -14,19 +14,19 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32: ; GFX950: ; %bb.0: @@ -49,19 +49,19 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_commute: ; GFX950: ; %bb.0: @@ -83,21 +83,21 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: s_wait_alu 0xf1ff ; GFX12-NEXT: ; return to shader part epilog ; -; GFX940-LABEL: s_fmaximum3_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_readfirstlane_b32 s0, v0 -; GFX940-NEXT: ; return to shader part epilog +; GFX942-LABEL: s_fmaximum3_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_readfirstlane_b32 s0, v0 +; GFX942-NEXT: ; return to shader part epilog ; ; GFX950-LABEL: s_fmaximum3_f32: ; GFX950: ; %bb.0: @@ -125,19 +125,19 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fabs0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fabs0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, |v0|, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fabs0: ; GFX950: ; %bb.0: @@ -161,19 +161,19 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fabs1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fabs1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, v0, |v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fabs1: ; GFX950: ; %bb.0: @@ -197,19 +197,19 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fabs2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fabs2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fabs2: ; GFX950: ; %bb.0: @@ -233,19 +233,19 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, |v0|, |v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fabs_all: ; GFX950: ; %bb.0: @@ -271,19 +271,19 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, -v0, -v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fneg_all: ; GFX950: ; %bb.0: @@ -309,19 +309,19 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v1, v0, -|v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all: ; GFX950: ; %bb.0: @@ -350,19 +350,19 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fneg0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fneg0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, -v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fneg0: ; GFX950: ; %bb.0: @@ -386,19 +386,19 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fneg1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v3, v0, -v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fneg1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v3, v0, -v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fneg1: ; GFX950: ; %bb.0: @@ -422,19 +422,19 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_fneg2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_fneg2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_fneg2: ; GFX950: ; %bb.0: @@ -458,19 +458,19 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_const0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, 0x41000000, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_const0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, 0x41000000, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_const0: ; GFX950: ; %bb.0: @@ -494,19 +494,19 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32__const2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32__const2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32__const2: ; GFX950: ; %bb.0: @@ -530,19 +530,19 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_inlineimm0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_inlineimm0: ; GFX950: ; %bb.0: @@ -565,19 +565,19 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32__inlineimm: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32__inlineimm: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v1, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32__inlineimm: ; GFX950: ; %bb.0: @@ -602,19 +602,19 @@ define float @v_fmaximum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_f32_const1_const2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_max_f32_e32 v1, 0x41800000, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_f32_const1_const2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_max_f32_e32 v1, 0x41800000, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_f32_const1_const2: ; GFX950: ; %bb.0: @@ -640,27 +640,27 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v4, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v5, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v4, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v5, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32: ; GFX950: ; %bb.0: @@ -685,27 +685,27 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v0, v4 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v2, v1, v5 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v0, v4 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v2, v1, v5 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32_commute: ; GFX950: ; %bb.0: @@ -730,27 +730,27 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v6, |v1|, |v3| -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v2| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v2, v0, |v4| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_max_f32_e64 v2, v1, |v5| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v6, |v1|, |v3| +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX942-NEXT: v_max_f32_e64 v3, |v0|, |v2| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v2, v0, |v4| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_max_f32_e64 v2, v1, |v5| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all: ; GFX950: ; %bb.0: @@ -778,27 +778,27 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v6, -v1, -v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v2, v0, -v4 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_max_f32_e64 v2, v1, -v5 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v6, -v1, -v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX942-NEXT: v_max_f32_e64 v3, -v0, -v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v2, v0, -v4 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_max_f32_e64 v2, v1, -v5 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all: ; GFX950: ; %bb.0: @@ -826,27 +826,27 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v4, 2.0, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX942-NEXT: v_max_f32_e32 v4, 2.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: v_max_f32_e32 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1: ; GFX950: ; %bb.0: @@ -871,27 +871,27 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v4, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v4, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_max_f32_e32 v2, 4.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2: ; GFX950: ; %bb.0: @@ -917,35 +917,35 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v6, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v7, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v8, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v6, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v7, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v8, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32: ; GFX950: ; %bb.0: @@ -972,35 +972,35 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v0, v6 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v1, v7 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v2, v8 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v0, v6 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v1, v7 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v2, v8 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32_commute: ; GFX950: ; %bb.0: @@ -1027,35 +1027,35 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v9, |v2|, |v5| -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX940-NEXT: v_max_f32_e64 v5, |v1|, |v4| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX940-NEXT: v_max_f32_e64 v4, |v0|, |v3| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v0, |v6| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v1, |v7| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v2, |v8| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v9, |v2|, |v5| +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX942-NEXT: v_max_f32_e64 v5, |v1|, |v4| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX942-NEXT: v_max_f32_e64 v4, |v0|, |v3| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v0, |v6| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v1, |v7| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v2, |v8| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all: ; GFX950: ; %bb.0: @@ -1085,35 +1085,35 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e64 v9, -v2, -v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX940-NEXT: v_max_f32_e64 v5, -v1, -v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX940-NEXT: v_max_f32_e64 v4, -v0, -v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v0, -v6 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v1, -v7 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_max_f32_e64 v3, v2, -v8 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e64 v9, -v2, -v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX942-NEXT: v_max_f32_e64 v5, -v1, -v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX942-NEXT: v_max_f32_e64 v4, -v0, -v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v0, -v6 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v1, -v7 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_max_f32_e64 v3, v2, -v8 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all: ; GFX950: ; %bb.0: @@ -1143,35 +1143,35 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v6, 2.0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX942-NEXT: v_max_f32_e32 v6, 2.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_max_f32_e32 v6, 2.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_max_f32_e32 v6, v0, v3 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, v2, v5 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1: ; GFX950: ; %bb.0: @@ -1198,35 +1198,35 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v6, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v6, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX942-NEXT: v_max_f32_e32 v3, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, 4.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v3, 4.0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2: ; GFX950: ; %bb.0: @@ -1774,30 +1774,30 @@ define <2 x half> @v_fmaximum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v0, v2, v0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v2, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v2, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v2, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16: ; GFX950: ; %bb.0: @@ -1824,30 +1824,30 @@ define <2 x half> @v_fmaximum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16_commute: ; GFX950: ; %bb.0: @@ -1877,32 +1877,32 @@ define <2 x half> @v_fmaximum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 -; GFX940-NEXT: v_pk_max_f16 v3, v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v4, v0, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 +; GFX942-NEXT: v_pk_max_f16 v3, v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v4, v0, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16__fabs_all: ; GFX950: ; %bb.0: @@ -1935,30 +1935,30 @@ define <2 x half> @v_fmaximum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16__fneg_all: ; GFX950: ; %bb.0: @@ -1988,30 +1988,30 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v3, v0, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v3, v0, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2038,30 +2038,30 @@ define <2 x half> @v_fmaximum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v2f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v4, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v2f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v4, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v2f16__inlineimm2: ; GFX950: ; %bb.0: @@ -2090,42 +2090,42 @@ define <3 x half> @v_fmaximum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v5, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v4, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v5, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v4, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16: ; GFX950: ; %bb.0: @@ -2155,42 +2155,42 @@ define <3 x half> @v_fmaximum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v5 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16_commute: ; GFX950: ; %bb.0: @@ -2227,46 +2227,46 @@ define <3 x half> @v_fmaximum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1 -; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2 -; GFX940-NEXT: v_pk_max_f16 v7, v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v6, v6, v8 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4 -; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v2, v8, v0, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v11 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_perm_b32 v6, v9, v1, s0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX940-NEXT: v_pk_max_f16 v6, v6, v10 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1 +; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2 +; GFX942-NEXT: v_pk_max_f16 v7, v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v6, v6, v8 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4 +; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v2, v8, v0, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v11 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_perm_b32 v6, v9, v1, s0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX942-NEXT: v_pk_max_f16 v6, v6, v10 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16__fabs_all: ; GFX950: ; %bb.0: @@ -2305,42 +2305,42 @@ define <3 x half> @v_fmaximum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16__fneg_all: ; GFX950: ; %bb.0: @@ -2373,39 +2373,39 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v7, v1, 2.0 -; GFX940-NEXT: s_mov_b32 s1, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_movk_i32 s0, 0x7e00 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_perm_b32 v4, v5, v0, s1 -; GFX940-NEXT: v_pk_max_f16 v4, v4, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc -; GFX940-NEXT: v_pack_b32_f16 v7, v1, s0 -; GFX940-NEXT: v_pk_max_f16 v7, v7, v3 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v7, v1, 2.0 +; GFX942-NEXT: s_mov_b32 s1, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_movk_i32 s0, 0x7e00 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_perm_b32 v4, v5, v0, s1 +; GFX942-NEXT: v_pk_max_f16 v4, v4, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX942-NEXT: v_pack_b32_f16 v7, v1, s0 +; GFX942-NEXT: v_pk_max_f16 v7, v7, v3 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2435,42 +2435,42 @@ define <3 x half> @v_fmaximum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v3f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v4, s0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, 4.0 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v3f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v4, s0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, 4.0 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v3f16__inlineimm2: ; GFX950: ; %bb.0: @@ -2500,48 +2500,48 @@ define <4 x half> @v_fmaximum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c ; GFX12-NEXT: v_pk_maximum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v5, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v4, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v5, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v4, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16: ; GFX950: ; %bb.0: @@ -2571,48 +2571,48 @@ define <4 x half> @v_fmaximum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v5 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v5 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16_commute: ; GFX950: ; %bb.0: @@ -2649,52 +2649,52 @@ define <4 x half> @v_fmaximum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2 -; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1 -; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3 -; GFX940-NEXT: v_pk_max_f16 v7, v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v6, v6, v8 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5 -; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v2, v8, v1, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v11 -; GFX940-NEXT: v_perm_b32 v6, v9, v0, s0 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v6, v6, v10 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v0, v7, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2 +; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1 +; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3 +; GFX942-NEXT: v_pk_max_f16 v7, v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v6, v6, v8 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5 +; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v2, v8, v1, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v11 +; GFX942-NEXT: v_perm_b32 v6, v9, v0, s0 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v6, v6, v10 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v0, v7, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16__fabs_all: ; GFX950: ; %bb.0: @@ -2733,48 +2733,48 @@ define <4 x half> @v_fmaximum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16__fneg_all: ; GFX950: ; %bb.0: @@ -2807,46 +2807,46 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_perm_b32 v4, v8, v1, s0 -; GFX940-NEXT: v_pk_max_f16 v4, v4, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_perm_b32 v8, v5, v0, s0 -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX940-NEXT: v_pk_max_f16 v8, v8, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v8 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: v_perm_b32 v1, v7, v1, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v7, v1, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_perm_b32 v4, v8, v1, s0 +; GFX942-NEXT: v_pk_max_f16 v4, v4, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_perm_b32 v8, v5, v0, s0 +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GFX942-NEXT: v_pk_max_f16 v8, v8, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v8 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: v_perm_b32 v1, v7, v1, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2876,48 +2876,48 @@ define <4 x half> @v_fmaximum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) { ; GFX12-NEXT: v_pk_maximum_f16 v1, v1, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fmaximum3_v4f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v4, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_max_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v4, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fmaximum3_v4f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v4, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_max_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v4, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fmaximum3_v4f16__inlineimm2: ; GFX950: ; %bb.0: @@ -3538,19 +3538,19 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_maximum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_no_fmaximum3_f32__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_no_fmaximum3_f32__multi_use: ; GFX950: ; %bb.0: @@ -3573,22 +3573,22 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_maximum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX940-LABEL: s_no_fmaximum3_f32__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX940-NEXT: v_readfirstlane_b32 s0, v0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_readfirstlane_b32 s1, v1 -; GFX940-NEXT: ; return to shader part epilog +; GFX942-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX942-NEXT: v_readfirstlane_b32 s0, v0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_readfirstlane_b32 s1, v1 +; GFX942-NEXT: ; return to shader part epilog ; ; GFX950-LABEL: s_no_fmaximum3_f32__multi_use: ; GFX950: ; %bb.0: @@ -3697,30 +3697,30 @@ define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, ; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_no_fmaximum3_v2f16__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s0 -; GFX940-NEXT: v_pk_max_f16 v3, v0, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s0 +; GFX942-NEXT: v_pk_max_f16 v3, v0, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_no_fmaximum3_v2f16__multi_use: ; GFX950: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 234a07849a911..56e0b2c2f06ce 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s define float @v_fminimum3_f32(float %a, float %b, float %c) { @@ -14,19 +14,19 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32: ; GFX950: ; %bb.0: @@ -49,19 +49,19 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_commute: ; GFX950: ; %bb.0: @@ -83,21 +83,21 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: s_wait_alu 0xf1ff ; GFX12-NEXT: ; return to shader part epilog ; -; GFX940-LABEL: s_fminimum3_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_readfirstlane_b32 s0, v0 -; GFX940-NEXT: ; return to shader part epilog +; GFX942-LABEL: s_fminimum3_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_readfirstlane_b32 s0, v0 +; GFX942-NEXT: ; return to shader part epilog ; ; GFX950-LABEL: s_fminimum3_f32: ; GFX950: ; %bb.0: @@ -125,19 +125,19 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fabs0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, |v0|, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fabs0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, |v0|, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fabs0: ; GFX950: ; %bb.0: @@ -161,19 +161,19 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fabs1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, v0, |v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fabs1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, v0, |v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fabs1: ; GFX950: ; %bb.0: @@ -197,19 +197,19 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fabs2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fabs2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fabs2: ; GFX950: ; %bb.0: @@ -233,19 +233,19 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, |v0|, |v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fabs_all: ; GFX950: ; %bb.0: @@ -271,19 +271,19 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, -v0, -v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fneg_all: ; GFX950: ; %bb.0: @@ -309,19 +309,19 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fneg_fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v1, v0, -|v2| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v1, v0, -|v2| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all: ; GFX950: ; %bb.0: @@ -350,19 +350,19 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fneg0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, -v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fneg0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, -v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fneg0: ; GFX950: ; %bb.0: @@ -386,19 +386,19 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fneg1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v3, v0, -v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fneg1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v3, v0, -v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fneg1: ; GFX950: ; %bb.0: @@ -422,19 +422,19 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_fneg2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_fneg2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_fneg2: ; GFX950: ; %bb.0: @@ -458,19 +458,19 @@ define float @v_fminimum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_const0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v2, 0x41000000, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_const0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v2, 0x41000000, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_const0: ; GFX950: ; %bb.0: @@ -494,19 +494,19 @@ define float @v_fminimum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32__const2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32__const2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32__const2: ; GFX950: ; %bb.0: @@ -530,19 +530,19 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_inlineimm0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_inlineimm0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_inlineimm0: ; GFX950: ; %bb.0: @@ -565,19 +565,19 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32__inlineimm: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v1, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32__inlineimm: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v1, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32__inlineimm: ; GFX950: ; %bb.0: @@ -602,19 +602,19 @@ define float @v_fminimum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_f32_const1_const2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_min_f32_e32 v1, 0x41800000, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_f32_const1_const2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_min_f32_e32 v1, 0x41800000, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_f32_const1_const2: ; GFX950: ; %bb.0: @@ -640,27 +640,27 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v4, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v5, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v4, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v5, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32: ; GFX950: ; %bb.0: @@ -685,27 +685,27 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v0, v4 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v2, v1, v5 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v0, v4 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v2, v1, v5 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32_commute: ; GFX950: ; %bb.0: @@ -730,27 +730,27 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v6, |v1|, |v3| -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v2| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v2, v0, |v4| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_min_f32_e64 v2, v1, |v5| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v6, |v1|, |v3| +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX942-NEXT: v_min_f32_e64 v3, |v0|, |v2| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v2, v0, |v4| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_min_f32_e64 v2, v1, |v5| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32__fabs_all: ; GFX950: ; %bb.0: @@ -778,27 +778,27 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v6, -v1, -v3 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v2, v0, -v4 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_min_f32_e64 v2, v1, -v5 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v6, -v1, -v3 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX942-NEXT: v_min_f32_e64 v3, -v0, -v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v2, v0, -v4 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_min_f32_e64 v2, v1, -v5 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32__fneg_all: ; GFX950: ; %bb.0: @@ -826,27 +826,27 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v4, 2.0, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX942-NEXT: v_min_f32_e32 v4, 2.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: v_min_f32_e32 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1: ; GFX950: ; %bb.0: @@ -871,27 +871,27 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f32__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v4, v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v4, v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX942-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2: ; GFX950: ; %bb.0: @@ -917,35 +917,35 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v6, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v7, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v8, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v6, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v7, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v8, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32: ; GFX950: ; %bb.0: @@ -972,35 +972,35 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v0, v6 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v1, v7 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v2, v8 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v0, v6 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v1, v7 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v2, v8 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32_commute: ; GFX950: ; %bb.0: @@ -1027,35 +1027,35 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v9, |v2|, |v5| -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX940-NEXT: v_min_f32_e64 v5, |v1|, |v4| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX940-NEXT: v_min_f32_e64 v4, |v0|, |v3| -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v0, |v6| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v1, |v7| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v2, |v8| -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v9, |v2|, |v5| +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX942-NEXT: v_min_f32_e64 v5, |v1|, |v4| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX942-NEXT: v_min_f32_e64 v4, |v0|, |v3| +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v0, |v6| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v1, |v7| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v2, |v8| +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32__fabs_all: ; GFX950: ; %bb.0: @@ -1085,35 +1085,35 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e64 v9, -v2, -v5 -; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX940-NEXT: v_min_f32_e64 v5, -v1, -v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX940-NEXT: v_min_f32_e64 v4, -v0, -v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v0, -v6 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v1, -v7 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX940-NEXT: v_min_f32_e64 v3, v2, -v8 -; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e64 v9, -v2, -v5 +; GFX942-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX942-NEXT: v_min_f32_e64 v5, -v1, -v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX942-NEXT: v_min_f32_e64 v4, -v0, -v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v0, -v6 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v1, -v7 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX942-NEXT: v_min_f32_e64 v3, v2, -v8 +; GFX942-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32__fneg_all: ; GFX950: ; %bb.0: @@ -1143,35 +1143,35 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX942-NEXT: v_min_f32_e32 v6, 2.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_min_f32_e32 v6, v0, v3 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: v_min_f32_e32 v3, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, v2, v5 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1: ; GFX950: ; %bb.0: @@ -1198,35 +1198,35 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f32__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v6, v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v1 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v6, v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX942-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX942-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v1 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2: ; GFX950: ; %bb.0: @@ -1774,30 +1774,30 @@ define <2 x half> @v_fminimum3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v0, v2, v0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v2, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v2, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v2, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16: ; GFX950: ; %bb.0: @@ -1824,30 +1824,30 @@ define <2 x half> @v_fminimum3_v2f16_commute(<2 x half> %a, <2 x half> %b, <2 x ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16_commute: ; GFX950: ; %bb.0: @@ -1877,32 +1877,32 @@ define <2 x half> @v_fminimum3_v2f16__fabs_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 -; GFX940-NEXT: v_pk_min_f16 v3, v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v4, v0, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, v5 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 +; GFX942-NEXT: v_pk_min_f16 v3, v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v1| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_and_b32_e32 v5, 0x7fff7fff, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v1| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v4, v0, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, |v2| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16__fabs_all: ; GFX950: ; %bb.0: @@ -1935,30 +1935,30 @@ define <2 x half> @v_fminimum3_v2f16__fneg_all(<2 x half> %a, <2 x half> %b, <2 ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v5, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v3, v0, v1 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v5, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v5, -v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16__fneg_all: ; GFX950: ; %bb.0: @@ -1988,30 +1988,30 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm1(<2 x half> %a, <2 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v3, v0, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v1 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v2, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v3, v0, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v1 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v3, v1 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2038,30 +2038,30 @@ define <2 x half> @v_fminimum3_v2f16__inlineimm2(<2 x half> %a, <2 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v0, v0, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v2f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v2, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v0, v4, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v2f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v2, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v3, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v0, v4, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v1, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v2f16__inlineimm2: ; GFX950: ; %bb.0: @@ -2090,42 +2090,42 @@ define <3 x half> @v_fminimum3_v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v5, v1 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v4, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v5, v1 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v4, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16: ; GFX950: ; %bb.0: @@ -2155,42 +2155,42 @@ define <3 x half> @v_fminimum3_v3f16_commute(<3 x half> %a, <3 x half> %b, <3 x ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, v5 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, v5 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16_commute: ; GFX950: ; %bb.0: @@ -2227,46 +2227,46 @@ define <3 x half> @v_fminimum3_v3f16__fabs_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1 -; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2 -; GFX940-NEXT: v_pk_min_f16 v7, v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v6, v6, v8 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4 -; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v2, v8, v0, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v11 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_perm_b32 v6, v9, v1, s0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX940-NEXT: v_pk_min_f16 v6, v6, v10 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v1 +; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v2 +; GFX942-NEXT: v_pk_min_f16 v7, v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v6, v6, v8 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v4 +; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v5 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v2, v8, v0, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v11 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v4| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_perm_b32 v6, v9, v1, s0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX942-NEXT: v_pk_min_f16 v6, v6, v10 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16__fabs_all: ; GFX950: ; %bb.0: @@ -2305,42 +2305,42 @@ define <3 x half> @v_fminimum3_v3f16__fneg_all(<3 x half> %a, <3 x half> %b, <3 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16__fneg_all: ; GFX950: ; %bb.0: @@ -2373,39 +2373,39 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm1(<3 x half> %a, <3 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v7, v1, 2.0 -; GFX940-NEXT: s_mov_b32 s1, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_movk_i32 s0, 0x7e00 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_perm_b32 v4, v5, v0, s1 -; GFX940-NEXT: v_pk_min_f16 v4, v4, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc -; GFX940-NEXT: v_pack_b32_f16 v7, v1, s0 -; GFX940-NEXT: v_pk_min_f16 v7, v7, v3 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v7, v1, 2.0 +; GFX942-NEXT: s_mov_b32 s1, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_movk_i32 s0, 0x7e00 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_perm_b32 v4, v5, v0, s1 +; GFX942-NEXT: v_pk_min_f16 v4, v4, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX942-NEXT: v_pack_b32_f16 v7, v1, s0 +; GFX942-NEXT: v_pk_min_f16 v7, v7, v3 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2435,42 +2435,42 @@ define <3 x half> @v_fminimum3_v3f16__inlineimm2(<3 x half> %a, <3 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v3f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v4, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v4, s0 -; GFX940-NEXT: v_pk_min_f16 v1, v1, 4.0 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v3f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v4, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v4, s0 +; GFX942-NEXT: v_pk_min_f16 v1, v1, 4.0 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v3f16__inlineimm2: ; GFX950: ; %bb.0: @@ -2500,48 +2500,48 @@ define <4 x half> @v_fminimum3_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c ; GFX12-NEXT: v_pk_minimum_f16 v1, v5, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v5, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v4, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v5, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v6 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v1 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v4, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v8 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v4, v0 src0_sel:WORD_1 src1_sel:DWORD +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16: ; GFX950: ; %bb.0: @@ -2571,48 +2571,48 @@ define <4 x half> @v_fminimum3_v4f16_commute(<4 x half> %a, <4 x half> %b, <4 x ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16_commute: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v5 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16_commute: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v5 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v5 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v8, v4 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16_commute: ; GFX950: ; %bb.0: @@ -2649,52 +2649,52 @@ define <4 x half> @v_fminimum3_v4f16__fabs_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16__fabs_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0 -; GFX940-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2 -; GFX940-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1 -; GFX940-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3 -; GFX940-NEXT: v_pk_min_f16 v7, v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v6, v6, v8 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5 -; GFX940-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v2, v8, v1, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v11 -; GFX940-NEXT: v_perm_b32 v6, v9, v0, s0 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v6, v6, v10 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc -; GFX940-NEXT: v_perm_b32 v0, v7, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16__fabs_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v7, 0x7fff7fff, v0 +; GFX942-NEXT: v_and_b32_e32 v9, 0x7fff7fff, v2 +; GFX942-NEXT: v_and_b32_e32 v6, 0x7fff7fff, v1 +; GFX942-NEXT: v_and_b32_e32 v8, 0x7fff7fff, v3 +; GFX942-NEXT: v_pk_min_f16 v7, v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v0|, |v2| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v6, v6, v8 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, |v1|, |v3| src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_and_b32_e32 v11, 0x7fff7fff, v5 +; GFX942-NEXT: v_and_b32_e32 v10, 0x7fff7fff, v4 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v12, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v0|, |v2| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, |v1|, |v3| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v2, v8, v1, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v11 +; GFX942-NEXT: v_perm_b32 v6, v9, v0, s0 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, |v5| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v6, v6, v10 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v9, |v4| src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v12, v7, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v1, |v5| +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v12, v2, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v0, |v4| +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v12, v6, vcc +; GFX942-NEXT: v_perm_b32 v0, v7, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16__fabs_all: ; GFX950: ; %bb.0: @@ -2733,48 +2733,48 @@ define <4 x half> @v_fminimum3_v4f16__fneg_all(<4 x half> %a, <4 x half> %b, <4 ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16__fneg_all: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: v_mov_b32_e32 v7, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v8, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16__fneg_all: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v6, v0, v2 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: v_mov_b32_e32 v7, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v0, -v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v7, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v6, 16, v6 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v0, -v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, -v1, -v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, -v1, -v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v6, -v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, -v5 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v8, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: v_cmp_o_f16_e64 vcc, v8, -v4 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v7, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, -v4 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16__fneg_all: ; GFX950: ; %bb.0: @@ -2807,46 +2807,46 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm1(<4 x half> %a, <4 x half> %c) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16__inlineimm1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, 0x7e00 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0] -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v7 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc -; GFX940-NEXT: v_perm_b32 v4, v8, v1, s0 -; GFX940-NEXT: v_pk_min_f16 v4, v4, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_perm_b32 v8, v5, v0, s0 -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX940-NEXT: v_pk_min_f16 v8, v8, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v8 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: v_perm_b32 v1, v7, v1, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16__inlineimm1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v4, v0, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, 0x7e00 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v0 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v7, v1, 2.0 op_sel_hi:[1,0] +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v7 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v8, v6, v8, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc +; GFX942-NEXT: v_perm_b32 v4, v8, v1, s0 +; GFX942-NEXT: v_pk_min_f16 v4, v4, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v8, v3 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_perm_b32 v8, v5, v0, s0 +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; GFX942-NEXT: v_pk_min_f16 v8, v8, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v6, v7, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v8 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v5, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v9, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: v_perm_b32 v1, v7, v1, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm1: ; GFX950: ; %bb.0: @@ -2876,48 +2876,48 @@ define <4 x half> @v_fminimum3_v4f16__inlineimm2(<4 x half> %a, <4 x half> %b) { ; GFX12-NEXT: v_pk_minimum_f16 v1, v1, 4.0 op_sel_hi:[1,0] ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_fminimum3_v4f16__inlineimm2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v4, v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: v_pk_min_f16 v2, v1, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v1, v4, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v2, v0, v6, s0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_fminimum3_v4f16__inlineimm2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v4, v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: v_pk_min_f16 v2, v1, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v3 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v1, v4, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v2, v0, v6, s0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v6, v6 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_fminimum3_v4f16__inlineimm2: ; GFX950: ; %bb.0: @@ -3538,19 +3538,19 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_minimum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_no_fminimum3_f32__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_no_fminimum3_f32__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX942-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_no_fminimum3_f32__multi_use: ; GFX950: ; %bb.0: @@ -3573,22 +3573,22 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_minimum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX940-LABEL: s_no_fminimum3_f32__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX940-NEXT: v_readfirstlane_b32 s0, v0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_readfirstlane_b32 s1, v1 -; GFX940-NEXT: ; return to shader part epilog +; GFX942-LABEL: s_no_fminimum3_f32__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX942-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX942-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX942-NEXT: v_readfirstlane_b32 s0, v0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_readfirstlane_b32 s1, v1 +; GFX942-NEXT: ; return to shader part epilog ; ; GFX950-LABEL: s_no_fminimum3_f32__multi_use: ; GFX950: ; %bb.0: @@ -3697,30 +3697,30 @@ define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, ; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_no_fminimum3_v2f16__multi_use: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_pk_min_f16 v3, v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, 0x7e00 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s0 -; GFX940-NEXT: v_pk_min_f16 v3, v0, v2 -; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX940-NEXT: v_perm_b32 v1, v1, v5, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_pk_min_f16 v3, v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s0 +; GFX942-NEXT: v_pk_min_f16 v3, v0, v2 +; GFX942-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX942-NEXT: v_perm_b32 v1, v1, v5, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_no_fminimum3_v2f16__multi_use: ; GFX950: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir index e94546fd5e8a5..a9b3eaf4c33a3 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX908 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A --- name: test_sgpr_init_multiuse diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir index baaca76bfd8a8..b3658080aae07 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-clear-kill-flags.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -enable-var-scope -check-prefix=GCN %s --- name: fold_zero_high_bits_src1_alive diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll similarity index 65% rename from llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll rename to llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll index 4216bdf409eda..d683bf4f778b5 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll @@ -1,22 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s -check-prefix=GFX940 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 | FileCheck %s -check-prefix=GFX942 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12 declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1) declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data) define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { -; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm ; ; GFX12-LABEL: flat_atomic_fadd_f32_noret_pat: ; GFX12: ; %bb.0: @@ -34,17 +34,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) { } define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { -; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v[0:1], v2 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm ; ; GFX12-LABEL: flat_atomic_fadd_f32_noret_pat_ieee: ; GFX12: ; %bb.0: @@ -61,15 +61,15 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 { ret void } define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { -; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f32_rtn_pat: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: flat_atomic_fadd_f32_rtn_pat: ; GFX12: ; %bb.0: @@ -90,15 +90,15 @@ define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) { } define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) { -; GFX940-LABEL: local_atomic_fadd_v2f16_noret: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NEXT: ds_pk_add_f16 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_v2f16_noret: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NEXT: ds_pk_add_f16 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX12-LABEL: local_atomic_fadd_v2f16_noret: ; GFX12: ; %bb.0: @@ -114,12 +114,12 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, } define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> %data) { -; GFX940-LABEL: local_atomic_fadd_v2f16_rtn: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_v2f16_rtn: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: local_atomic_fadd_v2f16_rtn: ; GFX12: ; %bb.0: @@ -138,15 +138,15 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> } define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, <2 x i16> %data) { -; GFX940-LABEL: local_atomic_fadd_v2bf16_noret: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NEXT: ds_pk_add_bf16 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_v2bf16_noret: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NEXT: ds_pk_add_bf16 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX12-LABEL: local_atomic_fadd_v2bf16_noret: ; GFX12: ; %bb.0: @@ -162,12 +162,12 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, } define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16> %data) { -; GFX940-LABEL: local_atomic_fadd_v2bf16_rtn: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_v2bf16_rtn: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: local_atomic_fadd_v2bf16_rtn: ; GFX12: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 4aec2ffead437..7ce49d2966516 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX940 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx942 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX942 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) @@ -28,16 +28,16 @@ define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -51,12 +51,12 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -79,20 +79,20 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -111,16 +111,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -134,12 +134,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -162,20 +162,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -194,16 +194,16 @@ define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -217,12 +217,12 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -245,20 +245,20 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -277,16 +277,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -300,12 +300,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -328,20 +328,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -360,16 +360,16 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -383,12 +383,12 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -411,20 +411,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -443,16 +443,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -466,12 +466,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -494,20 +494,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -526,16 +526,16 @@ define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -549,12 +549,12 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -577,20 +577,20 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -609,16 +609,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -632,12 +632,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -660,20 +660,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -692,16 +692,16 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, doub ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -715,12 +715,12 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -743,20 +743,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsr ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -775,16 +775,16 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) ret void @@ -798,12 +798,12 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) store double %ret, ptr undef @@ -826,20 +826,20 @@ define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrsp ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -858,16 +858,16 @@ define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, d ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -881,12 +881,12 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -909,20 +909,20 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> % ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -941,16 +941,16 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace( ; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_noret_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s8, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) ret void @@ -964,12 +964,12 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inr ; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) store double %ret, ptr undef @@ -992,20 +992,20 @@ define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr add ; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-NEXT: s_load_dword s10, s[4:5], 0x3c +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) store double %ret, ptr addrspace(1) %out, align 8 @@ -1027,17 +1027,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1056,17 +1056,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace( ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1087,17 +1087,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1116,17 +1116,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace( ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_flush: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1145,15 +1145,15 @@ define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %dat ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1170,15 +1170,15 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1197,15 +1197,15 @@ define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, doub ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_atomic_fadd_f64_rtn_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1235,17 +1235,17 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst ret void @@ -1266,17 +1266,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1295,17 +1295,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1326,17 +1326,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -1355,15 +1355,15 @@ define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1380,15 +1380,15 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1408,16 +1408,16 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: flat_atomic_fadd_f64_rtn_pat_system: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1447,17 +1447,17 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1 ret void @@ -1475,16 +1475,16 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, do ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret void @@ -1500,14 +1500,14 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_f64_rtn: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_f64_rtn: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret @@ -1525,15 +1525,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1551,15 +1551,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret void @@ -1577,15 +1577,15 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: ds_add_f64 v2, v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 4.0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: ds_add_f64 v2, v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_endpgm main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst ret void @@ -1601,13 +1601,13 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_f64_rtn_pat: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %ret @@ -1623,14 +1623,14 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret @@ -1646,14 +1646,14 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) ret double %ret diff --git a/llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir b/llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir similarity index 99% rename from llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir rename to llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir index 348beb7bcf3cc..2c760baf8a5eb 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx940-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx942-hazards.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: trans32_write_non_trans32_read # GCN: V_RCP_F32 diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll index 8a7762fb4b6c7..8eb6cecd6ba9e 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX11_GFX12 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX908_GFX11_GFX12,GFX11_GFX12 %s @@ -18,17 +18,17 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) ; GFX908_GFX11_GFX12-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) ; GFX908_GFX11_GFX12-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -96,67 +96,67 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa ; GFX908-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE - ; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX90A_GFX940-NEXT: S_BRANCH %bb.1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.1 (%ir-block.5): - ; GFX90A_GFX940-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0 - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX90A_GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - ; GFX90A_GFX940-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX90A_GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF]], implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX90A_GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] - ; GFX90A_GFX940-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec - ; GFX90A_GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec - ; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX90A_GFX940-NEXT: S_BRANCH %bb.2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.2 (%ir-block.31): - ; GFX90A_GFX940-NEXT: successors: %bb.3(0x80000000) - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY %1 - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_1]], [[COPY8]], [[COPY3]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.3.Flow: - ; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000) - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: bb.4 (%ir-block.33): - ; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE + ; GFX90A_GFX942-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX90A_GFX942-NEXT: S_BRANCH %bb.1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: bb.1 (%ir-block.5): + ; GFX90A_GFX942-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0 + ; GFX90A_GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX90A_GFX942-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX90A_GFX942-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX90A_GFX942-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF]], implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec + ; GFX90A_GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX90A_GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] + ; GFX90A_GFX942-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec + ; GFX90A_GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec + ; GFX90A_GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX90A_GFX942-NEXT: S_BRANCH %bb.2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: bb.2 (%ir-block.31): + ; GFX90A_GFX942-NEXT: successors: %bb.3(0x80000000) + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY %1 + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_1]], [[COPY8]], [[COPY3]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: bb.3.Flow: + ; GFX90A_GFX942-NEXT: successors: %bb.4(0x80000000) + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: bb.4 (%ir-block.33): + ; GFX90A_GFX942-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 ; ; GFX11_GFX12-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw ; GFX11_GFX12: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll index 1fb34abb41a2d..26e0ec9892c46 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll @@ -1,22 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=DPP < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): @@ -117,87 +117,87 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace ; GFX90A-NEXT: $vgpr0 = COPY [[PHI]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; - ; GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE - ; GFX940-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.1 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.1 (%ir-block.5): - ; GFX940-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec - ; GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1 - ; GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0 - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec - ; GFX940-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec - ; GFX940-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GFX940-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF1]], implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 - ; GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] - ; GFX940-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec - ; GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec - ; GFX940-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]] - ; GFX940-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.2 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.2 (%ir-block.32): - ; GFX940-NEXT: successors: %bb.4(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY %2 - ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_1]], [[COPY9]], [[COPY3]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: S_BRANCH %bb.4 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.3.Flow: - ; GFX940-NEXT: successors: %bb.5(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %8, %bb.4 - ; GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.4 (%ir-block.35): - ; GFX940-NEXT: successors: %bb.3(0x80000000) - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF2]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2 - ; GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec - ; GFX940-NEXT: early-clobber %45:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec - ; GFX940-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_READFIRSTLANE_B32_]], 0, killed %45, 0, 0, implicit $mode, implicit $exec - ; GFX940-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY [[COPY8]] - ; GFX940-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX940-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_ADD_F32_e64_6]], 0, [[COPY11]], [[COPY10]], implicit $exec - ; GFX940-NEXT: S_BRANCH %bb.3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: bb.5 (%ir-block.41): - ; GFX940-NEXT: $vgpr0 = COPY [[PHI]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX942-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec_xnull = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE + ; GFX942-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX942-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.1 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.1 (%ir-block.5): + ; GFX942-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY4]].sub0 + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed [[COPY6]], [[COPY7]], implicit $exec + ; GFX942-NEXT: [[V_MBCNT_HI_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e64 killed [[COPY5]], killed [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + ; GFX942-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX942-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY]], 0, [[V_MOV_B32_e32_]], killed [[DEF1]], implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_SET_INACTIVE_B32_]], 273, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_SET_INACTIVE_B32_]], 0, killed [[V_MOV_B32_dpp]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_]], 274, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_]], 0, killed [[V_MOV_B32_dpp1]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_1]], 276, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_1]], 0, killed [[V_MOV_B32_dpp2]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, killed [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_3]], 322, 10, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, killed [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_4]], 323, 12, 15, 0, implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_4]], 0, killed [[V_MOV_B32_dpp5]], 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[V_MOV_B32_dpp6:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_ADD_F32_e64_5]], 312, 15, 15, 0, implicit $exec + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 63 + ; GFX942-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_1]] + ; GFX942-NEXT: early-clobber %2:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec + ; GFX942-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_1 = COPY [[V_CMP_EQ_U32_e64_]] + ; GFX942-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX942-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.2 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.2 (%ir-block.32): + ; GFX942-NEXT: successors: %bb.4(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY %2 + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN killed [[V_MOV_B32_e32_1]], [[COPY9]], [[COPY3]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: S_BRANCH %bb.4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.3.Flow: + ; GFX942-NEXT: successors: %bb.5(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[DEF]], %bb.0, %8, %bb.4 + ; GFX942-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.4 (%ir-block.35): + ; GFX942-NEXT: successors: %bb.3(0x80000000) + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF2]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2 + ; GFX942-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec + ; GFX942-NEXT: early-clobber %45:vgpr_32 = STRICT_WWM [[V_MOV_B32_dpp6]], implicit $exec + ; GFX942-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_READFIRSTLANE_B32_]], 0, killed %45, 0, 0, implicit $mode, implicit $exec + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY [[COPY8]] + ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_ADD_F32_e64_6]], 0, [[COPY11]], [[COPY10]], implicit $exec + ; GFX942-NEXT: S_BRANCH %bb.3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: bb.5 (%ir-block.41): + ; GFX942-NEXT: $vgpr0 = COPY [[PHI]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw ; GFX11: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll index ba94a53dff03b..fa4e7f87853dd 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll @@ -1,22 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX940,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64 killed [[COPY4]], killed [[COPY5]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret void } @@ -67,66 +67,66 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) % ; GFX90A-NEXT: $sgpr1 = COPY [[COPY13]] ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 ; - ; GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw - ; GFX940: bb.0 (%ir-block.0): - ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX940-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 - ; GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 - ; GFX940-NEXT: $sgpr0 = COPY [[COPY6]] - ; GFX940-NEXT: $sgpr1 = COPY [[COPY7]] - ; GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX942-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_RTN killed [[COPY4]], killed [[COPY5]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_RTN]].sub1 + ; GFX942-NEXT: $sgpr0 = COPY [[COPY6]] + ; GFX942-NEXT: $sgpr1 = COPY [[COPY7]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic ret double %ret } define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_ADD_F64_SADDR killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret void } define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 - ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 - ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[COPY5]] - ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[COPY6]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]] + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN:%[0-9]+]]:vreg_64_align2 = GLOBAL_ATOMIC_ADD_F64_SADDR_RTN killed [[V_MOV_B32_e32_]], killed [[COPY4]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub0 + ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_ATOMIC_ADD_F64_SADDR_RTN]].sub1 + ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[COPY5]] + ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[COPY6]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll index 02e425e6d10a8..82cec179e72d2 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { @@ -20,17 +20,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn(ptr addrspace(1) %ptr, <2 ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -48,17 +48,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn(ptr addrspace(1) in ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -76,17 +76,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat(ptr addrspace(1) %pt ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16 killed [[COPY3]], [[COPY]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -104,17 +104,17 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat(ptr addrspace( ; GFX908-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: S_ENDPGM 0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: GLOBAL_ATOMIC_PK_ADD_F16_SADDR killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll index 794a52b6900ea..b9e833a7105d8 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll @@ -1,73 +1,73 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn(ptr addrspace(1) inreg %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat(ptr addrspace(1) %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_flat - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_rtn_flat + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]] + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN killed [[COPY3]], [[COPY]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat(ptr addrspace(1) inreg %ptr, <2 x half> %data) { - ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat - ; GFX90A_GFX940: bb.0 (%ir-block.0): - ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 - ; GFX90A_GFX940-NEXT: {{ $}} - ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A_GFX940-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) - ; GFX90A_GFX940-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] - ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A_GFX942-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat + ; GFX90A_GFX942: bb.0 (%ir-block.0): + ; GFX90A_GFX942-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GFX90A_GFX942-NEXT: {{ $}} + ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A_GFX942-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A_GFX942-NEXT: [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN killed [[V_MOV_B32_e32_]], [[COPY]], killed [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, addrspace 1) + ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]] + ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %data syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll index 73b4428b03c81..904ef8a4b6579 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -27,14 +27,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -211,14 +211,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -397,14 +397,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -593,14 +593,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -771,14 +771,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -952,14 +952,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1142,14 +1142,14 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1331,14 +1331,14 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1514,14 +1514,14 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote(ptr addrspace(1) %pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote: ; GFX11: ; %bb.0: @@ -1716,14 +1716,14 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -1918,14 +1918,14 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_deno ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -2113,14 +2113,14 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2299,14 +2299,14 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -2471,14 +2471,14 @@ define float @global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode(ptr a ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -2673,14 +2673,14 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote(ptr addrspace(1) %p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote: ; GFX11: ; %bb.0: @@ -2868,14 +2868,14 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory( ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3049,14 +3049,14 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -3204,14 +3204,14 @@ define void @global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -3399,14 +3399,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr addr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -3599,14 +3599,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr add ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -3791,14 +3791,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -3991,14 +3991,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -4183,14 +4183,14 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4367,14 +4367,14 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4549,14 +4549,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4719,14 +4719,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4891,14 +4891,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5073,14 +5073,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5225,14 +5225,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5380,14 +5380,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5544,14 +5544,14 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5719,14 +5719,14 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5876,14 +5876,14 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -6048,14 +6048,14 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -6203,14 +6203,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6403,14 +6403,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6595,14 +6595,14 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6765,14 +6765,14 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f32 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -6938,14 +6938,14 @@ define double @global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7159,14 +7159,14 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7381,14 +7381,14 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7609,14 +7609,14 @@ define void @global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7812,14 +7812,14 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:2040 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:2040 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8018,14 +8018,14 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_add_f64 v[0:1], v[2:3], off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8252,38 +8252,38 @@ define half @global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8592,40 +8592,40 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8943,41 +8943,41 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9291,37 +9291,37 @@ define void @global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9619,39 +9619,39 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9957,40 +9957,40 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10285,30 +10285,30 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_add_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_add_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10544,29 +10544,29 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_n ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10811,40 +10811,40 @@ define half @global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11163,39 +11163,39 @@ define void @global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_add_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_add_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11516,47 +11516,47 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11911,49 +11911,49 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12318,50 +12318,50 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12721,46 +12721,46 @@ define void @global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13104,48 +13104,48 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB58_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB58_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB58_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13498,49 +13498,49 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB59_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB59_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB59_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB59_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13882,40 +13882,40 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB60_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_add_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB60_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB60_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_add_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB60_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14204,39 +14204,39 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB61_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB61_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB61_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB61_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14534,49 +14534,49 @@ define bfloat @global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB62_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB62_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB62_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB62_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14942,48 +14942,48 @@ define void @global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB63_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB63_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB63_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB63_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15301,14 +15301,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15533,14 +15533,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15767,14 +15767,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16005,14 +16005,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16215,14 +16215,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16428,14 +16428,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16650,14 +16650,14 @@ define <2 x half> @global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16887,14 +16887,14 @@ define void @global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -17102,14 +17102,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -17348,14 +17348,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr a ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -17584,14 +17584,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -17816,14 +17816,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -18026,14 +18026,14 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__maybe_remote(ptr addrspac ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote: ; GFX11: ; %bb.0: @@ -18272,14 +18272,14 @@ define void @global_agent_atomic_fadd_noret_v2f16__maybe_remote(ptr addrspace(1) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote: ; GFX11: ; %bb.0: @@ -18512,14 +18512,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -18840,14 +18840,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -19170,14 +19170,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_ ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -19504,14 +19504,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -19822,14 +19822,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -20143,14 +20143,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fin ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -20473,14 +20473,14 @@ define <2 x bfloat> @global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -20806,14 +20806,14 @@ define void @global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off offset:2044 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -21129,14 +21129,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -21457,14 +21457,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -21775,14 +21775,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -22103,14 +22103,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -22421,14 +22421,14 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__maybe_remote(ptr addrs ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote: ; GFX11: ; %bb.0: @@ -22749,14 +22749,14 @@ define void @global_agent_atomic_fadd_noret_v2bf16__maybe_remote(ptr addrspace(1 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_pk_add_bf16 v[0:1], v2, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote: ; GFX11: ; %bb.0: @@ -23079,25 +23079,25 @@ define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #1 { ; GFX12-NEXT: .LBB92_2: ; GFX12-NEXT: s_endpgm ; -; GFX940-LABEL: infer_as_before_atomic: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_and_saveexec_b64 s[2:3], vcc -; GFX940-NEXT: s_cbranch_execz .LBB92_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_atomic_add_f32 v0, v1, s[2:3] -; GFX940-NEXT: .LBB92_2: -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: infer_as_before_atomic: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc +; GFX942-NEXT: s_cbranch_execz .LBB92_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_atomic_add_f32 v0, v1, s[2:3] +; GFX942-NEXT: .LBB92_2: +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: infer_as_before_atomic: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll index cd6ed1e6b98c2..e8d73914ad302 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -27,30 +27,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -188,30 +188,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -351,30 +351,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -514,29 +514,29 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -669,29 +669,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -827,29 +827,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -986,30 +986,30 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1152,29 +1152,29 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1312,30 +1312,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory(ptr addr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1542,30 +1542,30 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__a ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1707,30 +1707,30 @@ define float @global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1868,30 +1868,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2031,30 +2031,30 @@ define float @global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2194,29 +2194,29 @@ define void @global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_mem ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2349,29 +2349,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2507,29 +2507,29 @@ define void @global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2666,30 +2666,30 @@ define float @global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2832,29 +2832,29 @@ define void @global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3015,14 +3015,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3187,14 +3187,14 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3360,14 +3360,14 @@ define double @global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3532,14 +3532,14 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3694,14 +3694,14 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:2040 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:2040 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3859,14 +3859,14 @@ define void @global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[2:3], off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4025,14 +4025,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory(ptr add ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4273,14 +4273,14 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__ ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4463,40 +4463,40 @@ define half @global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4817,42 +4817,42 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5182,43 +5182,43 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5545,39 +5545,39 @@ define void @global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5889,41 +5889,41 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6243,42 +6243,42 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6586,32 +6586,32 @@ define half @global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f16_e32 v3, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f16_e32 v3, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6860,31 +6860,31 @@ define void @global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_n ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v4, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v3, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v4 -; GFX940-NEXT: v_and_or_b32 v2, v3, s2, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v4, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v3, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v4 +; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7142,42 +7142,42 @@ define half @global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7509,41 +7509,41 @@ define void @global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7875,47 +7875,47 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8272,49 +8272,49 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8681,50 +8681,50 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9086,46 +9086,46 @@ define void @global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9471,48 +9471,48 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9867,49 +9867,49 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10253,40 +10253,40 @@ define bfloat @global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10577,39 +10577,39 @@ define void @global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10909,49 +10909,49 @@ define bfloat @global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11319,48 +11319,48 @@ define void @global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11699,31 +11699,31 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_me ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11991,31 +11991,31 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12285,31 +12285,31 @@ define <2 x half> @global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12582,30 +12582,30 @@ define void @global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12863,30 +12863,30 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13147,30 +13147,30 @@ define void @global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13441,31 +13441,31 @@ define <2 x half> @global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13737,30 +13737,30 @@ define void @global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14047,47 +14047,47 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14446,47 +14446,47 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14847,47 +14847,47 @@ define <2 x bfloat> @global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15251,46 +15251,46 @@ define void @global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memor ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15638,46 +15638,46 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB58_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB58_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB58_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16028,46 +16028,46 @@ define void @global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB59_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB59_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB59_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB59_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16428,47 +16428,47 @@ define <2 x bfloat> @global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB60_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB60_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB60_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB60_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16831,46 +16831,46 @@ define void @global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB61_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_max_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB61_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB61_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_max_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB61_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll index b49047c54d7dd..c1c92906df250 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -27,30 +27,30 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(pt ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -188,30 +188,30 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -351,30 +351,30 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_gr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -514,29 +514,29 @@ define void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -669,29 +669,29 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -827,29 +827,29 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -986,30 +986,30 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1152,29 +1152,29 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1312,30 +1312,30 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory(ptr addr ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1542,30 +1542,30 @@ define float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__a ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -1707,30 +1707,30 @@ define float @global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memo ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -1868,30 +1868,30 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2031,30 +2031,30 @@ define float @global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fi ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2194,29 +2194,29 @@ define void @global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_mem ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2349,29 +2349,29 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2507,29 +2507,29 @@ define void @global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2666,30 +2666,30 @@ define float @global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_f ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f32_e32 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f32_e32 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -2832,29 +2832,29 @@ define void @global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_ ; GFX12-NEXT: global_inv scope:SCOPE_SYS ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f32_e32 v4, v2, v2 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f32_e32 v4, v2, v2 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3015,14 +3015,14 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3187,14 +3187,14 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:2040 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3360,14 +3360,14 @@ define double @global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3532,14 +3532,14 @@ define void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3694,14 +3694,14 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:2040 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:2040 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -3859,14 +3859,14 @@ define void @global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:-2048 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[2:3], off offset:-2048 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4025,14 +4025,14 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory(ptr add ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4273,14 +4273,14 @@ define double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__ ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory: ; GFX11: ; %bb.0: @@ -4463,40 +4463,40 @@ define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -4817,42 +4817,42 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5182,43 +5182,43 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5545,39 +5545,39 @@ define void @global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory(p ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_min_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_min_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -5889,41 +5889,41 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6243,42 +6243,42 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_g ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6586,32 +6586,32 @@ define half @global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_max_f16_e32 v3, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v3, v3, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_max_f16_e32 v3, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v3, v3, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -6860,31 +6860,31 @@ define void @global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_n ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v4, v2, v2 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v3, v3 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v4 -; GFX940-NEXT: v_and_or_b32 v2, v3, s2, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v4, v2, v2 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v3, v3 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v4 +; GFX942-NEXT: v_and_or_b32 v2, v3, s2, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7142,42 +7142,42 @@ define half @global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_e32 v5, v3, v7 -; GFX940-NEXT: v_max_f16_e32 v5, v5, v5 -; GFX940-NEXT: v_min_f16_e32 v5, v5, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v5, v3, v5 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX942-NEXT: v_max_f16_e32 v5, v5, v5 +; GFX942-NEXT: v_min_f16_e32 v5, v5, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7509,41 +7509,41 @@ define void @global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_max_f16_e32 v6, v2, v2 -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v4, v3 -; GFX940-NEXT: v_max_f16_e32 v2, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v2, v2, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v4, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_max_f16_e32 v6, v2, v2 +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v4, v3 +; GFX942-NEXT: v_max_f16_e32 v2, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v2, v2, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -7875,47 +7875,47 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8272,49 +8272,49 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -8681,50 +8681,50 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9086,46 +9086,46 @@ define void @global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9471,48 +9471,48 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -9867,49 +9867,49 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10253,40 +10253,40 @@ define bfloat @global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10577,39 +10577,39 @@ define void @global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_ ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -10909,49 +10909,49 @@ define bfloat @global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11319,48 +11319,48 @@ define void @global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11699,31 +11699,31 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_me ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -11991,31 +11991,31 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12285,31 +12285,31 @@ define <2 x half> @global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12582,30 +12582,30 @@ define void @global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2f16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -12863,30 +12863,30 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13147,30 +13147,30 @@ define void @global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13441,31 +13441,31 @@ define <2 x half> @global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_max_f16 v3, v5, v5 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v4, v3, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_max_f16 v3, v5, v5 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v4, v3, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -13737,30 +13737,30 @@ define void @global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v4, v2, v2 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v4, v2, v2 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14047,47 +14047,47 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14446,47 +14446,47 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -14847,47 +14847,47 @@ define <2 x bfloat> @global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_ ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15251,46 +15251,46 @@ define void @global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memor ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2bf16__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -15638,46 +15638,46 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB58_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB58_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB58_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB58_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16028,46 +16028,46 @@ define void @global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB59_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB59_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB59_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB59_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16428,47 +16428,47 @@ define <2 x bfloat> @global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB60_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB60_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB60_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB60_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: @@ -16831,46 +16831,46 @@ define void @global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fi ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB61_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_min_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB61_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB61_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_min_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB61_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll index 5577029f502d0..c131921c83fff 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -44,28 +44,28 @@ define float @global_agent_atomic_fsub_ret_f32(ptr addrspace(1) %ptr, float %val ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32: ; GFX11: ; %bb.0: @@ -275,28 +275,28 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1) % ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -508,28 +508,28 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg(ptr addrspace(1) % ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg: ; GFX11: ; %bb.0: @@ -749,27 +749,27 @@ define void @global_agent_atomic_fsub_noret_f32(ptr addrspace(1) %ptr, float %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32: ; GFX11: ; %bb.0: @@ -969,27 +969,27 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1192,27 +1192,27 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg: ; GFX11: ; %bb.0: @@ -1426,28 +1426,28 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1660,27 +1660,27 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos: ; GFX11: ; %bb.0: @@ -1891,28 +1891,28 @@ define float @global_agent_atomic_fsub_ret_f32__ftz(ptr addrspace(1) %ptr, float ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32__ftz: ; GFX11: ; %bb.0: @@ -2122,28 +2122,28 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspace ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -2355,28 +2355,28 @@ define float @global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr addrspace ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f32__offset12b_neg__ftz: ; GFX11: ; %bb.0: @@ -2596,27 +2596,27 @@ define void @global_agent_atomic_fsub_noret_f32__ftz(ptr addrspace(1) %ptr, floa ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32__ftz: ; GFX11: ; %bb.0: @@ -2816,27 +2816,27 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspac ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -3039,27 +3039,27 @@ define void @global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr addrspac ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f32__offset12b_neg__ftz: ; GFX11: ; %bb.0: @@ -3273,28 +3273,28 @@ define float @global_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr addrspac ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -3507,27 +3507,27 @@ define void @global_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr addrspa ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos__ftz: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f32_e32 v4, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos__ftz: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f32_e32 v4, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_f32__offset12b_pos__ftz: ; GFX11: ; %bb.0: @@ -3738,29 +3738,29 @@ define double @global_agent_atomic_fsub_ret_f64(ptr addrspace(1) %ptr, double %v ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f64: ; GFX11: ; %bb.0: @@ -3989,29 +3989,29 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:2040 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:2040 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_pos: ; GFX11: ; %bb.0: @@ -4241,29 +4241,29 @@ define double @global_agent_atomic_fsub_ret_f64__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB18_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f64__offset12b_neg: ; GFX11: ; %bb.0: @@ -4499,27 +4499,27 @@ define void @global_agent_atomic_fsub_noret_f64(ptr addrspace(1) %ptr, double %v ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f64: ; GFX11: ; %bb.0: @@ -4728,27 +4728,27 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off offset:2040 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[0:1], off offset:2040 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:2040 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_pos: ; GFX11: ; %bb.0: @@ -4960,27 +4960,27 @@ define void @global_agent_atomic_fsub_noret_f64__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB21_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB21_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f64__offset12b_neg: ; GFX11: ; %bb.0: @@ -5220,38 +5220,38 @@ define half @global_agent_atomic_fsub_ret_f16(ptr addrspace(1) %ptr, half %val) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f16: ; GFX11: ; %bb.0: @@ -5560,40 +5560,40 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) %p ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB23_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB23_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -5911,41 +5911,41 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_neg(ptr addrspace(1) %p ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB24_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB24_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -6259,37 +6259,37 @@ define void @global_agent_atomic_fsub_noret_f16(ptr addrspace(1) %ptr, half %val ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB25_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB25_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB25_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f16: ; GFX11: ; %bb.0: @@ -6587,39 +6587,39 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -6925,40 +6925,40 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -7253,30 +7253,30 @@ define half @global_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr addrspa ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_sub_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_f16__offset12b_pos__align4: ; GFX11: ; %bb.0: @@ -7512,29 +7512,29 @@ define void @global_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr addrs ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_f16__offset12b__align4_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_sub_f16_e32 v3, v5, v2 -; GFX940-NEXT: v_and_or_b32 v4, v5, s2, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_f16__offset12b__align4_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_sub_f16_e32 v3, v5, v2 +; GFX942-NEXT: v_and_or_b32 v4, v5, s2, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_f16__offset12b__align4_pos: ; GFX11: ; %bb.0: @@ -7779,40 +7779,40 @@ define half @global_system_atomic_fsub_ret_f16__offset12b_pos(ptr addrspace(1) % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v6 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB30_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v7 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v5, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB30_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[6:7], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v6 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB30_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v7 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v6, v7, v5, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB30_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -8131,39 +8131,39 @@ define void @global_system_atomic_fsub_noret_f16__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB31_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v3, v5 -; GFX940-NEXT: v_sub_f16_e32 v4, v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v3, v4 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB31_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v3, v5 +; GFX942-NEXT: v_sub_f16_e32 v4, v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v3, v4 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB31_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -8484,47 +8484,47 @@ define bfloat @global_agent_atomic_fsub_ret_bf16(ptr addrspace(1) %ptr, bfloat % ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB32_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB32_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB32_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB32_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_bf16: ; GFX11: ; %bb.0: @@ -8879,49 +8879,49 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB33_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB33_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB33_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB33_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -9286,50 +9286,50 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB34_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB34_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB34_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -9689,46 +9689,46 @@ define void @global_agent_atomic_fsub_noret_bf16(ptr addrspace(1) %ptr, bfloat % ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v3 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v6, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB35_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_bfe_u32 v7, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v7, v7, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v5, v6, v4 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB35_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v6, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_bfe_u32 v7, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v7, v7, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v5, v6, v4 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v4, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB35_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_bf16: ; GFX11: ; %bb.0: @@ -10072,48 +10072,48 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB36_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB36_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB36_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB36_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -10466,49 +10466,49 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr addrspace(1) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_movk_i32 s0, 0xf800 -; GFX940-NEXT: s_mov_b32 s1, -1 -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB37_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB37_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0xf800 +; GFX942-NEXT: s_mov_b32 s1, -1 +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB37_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -10850,40 +10850,40 @@ define bfloat @global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr addr ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB38_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v5 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v2 -; GFX940-NEXT: v_bfe_u32 v4, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v4, v4, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_and_or_b32 v4, v5, s3, v3 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB38_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX942-NEXT: v_bfe_u32 v4, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v4, v4, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v4, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_and_or_b32 v4, v5, s3, v3 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB38_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_bf16__offset12b_pos__align4: ; GFX11: ; %bb.0: @@ -11172,39 +11172,39 @@ define void @global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr addr ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2046 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB39_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_bfe_u32 v5, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v5, v5, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v3, s3, v2 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB39_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2046 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB39_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_bfe_u32 v5, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v5, v5, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v3, s3, v2 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2046 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB39_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_bf16__offset12b__align4_pos: ; GFX11: ; %bb.0: @@ -11502,49 +11502,49 @@ define bfloat @global_system_atomic_fsub_ret_bf16__offset12b_pos(ptr addrspace(1 ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v3, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v4, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v4, v4 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB40_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v5, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v6, v7, v4, v5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB40_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v3, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v3, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v4, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v4, v4 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_lshrrev_b32_sdwa v5, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v5, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v5, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v5, v[0:1], v[6:7], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB40_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -11910,48 +11910,48 @@ define void @global_system_atomic_fsub_noret_bf16__offset12b_pos(ptr addrspace(1 ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b64 s[0:1], 0x7fe -; GFX940-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] -; GFX940-NEXT: v_and_b32_e32 v0, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v5, v4, s0 -; GFX940-NEXT: v_not_b32_e32 v5, v5 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 16, v2 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB41_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v2, v3, v5, v2 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB41_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b64 s[0:1], 0x7fe +; GFX942-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX942-NEXT: v_and_b32_e32 v0, -4, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v5, v4, s0 +; GFX942-NEXT: v_not_b32_e32 v5, v5 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 16, v2 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB41_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v7, v8, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v2, v3, v5, v2 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB41_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -12286,28 +12286,28 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16(ptr addrspace(1) %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB42_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB42_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB42_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB42_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2f16: ; GFX11: ; %bb.0: @@ -12563,28 +12563,28 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrspa ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB43_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB43_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB43_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -12842,28 +12842,28 @@ define <2 x half> @global_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr addrspa ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB44_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB44_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB44_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB44_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -13123,27 +13123,27 @@ define void @global_agent_atomic_fsub_noret_v2f16(ptr addrspace(1) %ptr, <2 x ha ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB45_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB45_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB45_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB45_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2f16: ; GFX11: ; %bb.0: @@ -13387,27 +13387,27 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace(1 ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB46_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB46_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB46_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB46_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -13654,27 +13654,27 @@ define void @global_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr addrspace(1 ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB47_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB47_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB47_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB47_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2f16__offset12b_neg: ; GFX11: ; %bb.0: @@ -13932,28 +13932,28 @@ define <2 x half> @global_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr addrsp ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB48_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB48_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB48_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB48_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -14212,27 +14212,27 @@ define void @global_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr addrspace( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_v2f16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB49_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB49_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_v2f16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB49_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v4, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB49_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_v2f16__offset12b_pos: ; GFX11: ; %bb.0: @@ -14508,47 +14508,47 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16(ptr addrspace(1) %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB50_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB50_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB50_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB50_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2bf16: ; GFX11: ; %bb.0: @@ -14907,47 +14907,47 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr addr ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB51_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB51_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB51_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB51_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -15308,47 +15308,47 @@ define <2 x bfloat> @global_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr addr ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB52_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB52_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB52_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_ret_v2bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -15712,46 +15712,46 @@ define void @global_agent_atomic_fsub_noret_v2bf16(ptr addrspace(1) %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB53_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB53_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB53_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB53_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2bf16: ; GFX11: ; %bb.0: @@ -16099,46 +16099,46 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB54_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB54_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB54_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB54_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -16489,46 +16489,46 @@ define void @global_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr addrspace( ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_neg: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:-2048 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB55_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB55_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_neg: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:-2048 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB55_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:-2048 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB55_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_agent_atomic_fsub_noret_v2bf16__offset12b_neg: ; GFX11: ; %bb.0: @@ -16889,47 +16889,47 @@ define <2 x bfloat> @global_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr add ; GFX12-NEXT: v_mov_b32_e32 v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_ret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB56_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 -; GFX940-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v2 -; GFX940-NEXT: v_bfe_u32 v6, v3, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v3 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v3, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v6, v5, v3, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB56_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_ret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB56_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v7 +; GFX942-NEXT: v_sub_f32_e32 v3, v3, v4 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v2 +; GFX942-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v3 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v3, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v3, v6, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v6, v5, v3, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v3, v[0:1], v[6:7], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB56_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_ret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: @@ -17292,46 +17292,46 @@ define void @global_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr addrspace ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: global_system_atomic_fsub_noret_v2bf16__offset12b_pos: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:2044 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v2 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB57_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v4 -; GFX940-NEXT: v_sub_f32_e32 v6, v6, v5 -; GFX940-NEXT: v_bfe_u32 v7, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v6 -; GFX940-NEXT: v_add3_u32 v7, v7, v2, s4 -; GFX940-NEXT: v_add3_u32 v9, v9, v6, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s5 -; GFX940-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc0 sc1 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB57_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_atomic_fsub_noret_v2bf16__offset12b_pos: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v3, v[0:1], off offset:2044 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v2 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB57_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v6, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v4 +; GFX942-NEXT: v_sub_f32_e32 v6, v6, v5 +; GFX942-NEXT: v_bfe_u32 v7, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v6 +; GFX942-NEXT: v_add3_u32 v7, v7, v2, s4 +; GFX942-NEXT: v_add3_u32 v9, v9, v6, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v9, v10, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v7, v8, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s5 +; GFX942-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NEXT: global_atomic_cmpswap v2, v[0:1], v[2:3], off offset:2044 sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc0 sc1 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB57_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: global_system_atomic_fsub_noret_v2bf16__offset12b_pos: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll b/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll index 5416053078ec4..28aa76ab12f37 100644 --- a/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand < %s | FileCheck --check-prefix=OPT %s define i32 @global_agent_monotonic_idempotent_or(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_or: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_or: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_or( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("agent-one-as") monotonic, align 4 @@ -20,13 +20,13 @@ entry: } define i32 @global_agent_acquire_idempotent_or(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_acquire_idempotent_or: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_acquire_idempotent_or: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_acquire_idempotent_or( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("agent-one-as") acquire, align 4 @@ -38,14 +38,14 @@ entry: } define i32 @global_agent_release_idempotent_or(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_release_idempotent_or: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_release_idempotent_or: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_release_idempotent_or( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") release, align 4 @@ -57,15 +57,15 @@ entry: } define i32 @global_agent_acquire_release_idempotent_or(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_acquire_release_idempotent_or: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_acquire_release_idempotent_or: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_acquire_release_idempotent_or( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") acq_rel, align 4 @@ -77,15 +77,15 @@ entry: } define i32 @global_agent_acquire_release_idempotent_or__no_fine_grained(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_acquire_release_idempotent_or__no_fine_grained: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_acquire_release_idempotent_or__no_fine_grained: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_acquire_release_idempotent_or__no_fine_grained( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] @@ -97,15 +97,15 @@ entry: } define i32 @global_agent_seq_cst_idempotent_or(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_seq_cst_idempotent_or: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_inv sc1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_seq_cst_idempotent_or: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: buffer_wbl2 sc1 +; GFX942-NEXT: global_atomic_or v0, v[0:1], v2, off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_inv sc1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_seq_cst_idempotent_or( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") seq_cst, align 4 @@ -117,12 +117,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_add(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_add: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_add: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_add( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4 @@ -134,12 +134,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_add__no_fine_grained(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_add__no_fine_grained: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_add__no_fine_grained: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_add__no_fine_grained( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -151,12 +151,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_sub(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_sub: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_sub: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_sub( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4 @@ -168,12 +168,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_sub__no_fine_grained(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_sub__no_fine_grained: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_sub__no_fine_grained: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_sub__no_fine_grained( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -185,12 +185,12 @@ entry: } define i32 @global_system_monotonic_idempotent_xor(ptr addrspace(1) %in) { -; GFX940-LABEL: global_system_monotonic_idempotent_xor: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_monotonic_idempotent_xor: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_system_monotonic_idempotent_xor( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4 @@ -202,12 +202,12 @@ entry: } define i32 @global_system_monotonic_idempotent_xor__no_fine_grained(ptr addrspace(1) %in) { -; GFX940-LABEL: global_system_monotonic_idempotent_xor__no_fine_grained: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_system_monotonic_idempotent_xor__no_fine_grained: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_system_monotonic_idempotent_xor__no_fine_grained( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -219,12 +219,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_and(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_and: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_and: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_and( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4 @@ -236,12 +236,12 @@ entry: } define i32 @global_agent_monotonic_idempotent_and_no_fined_grain(ptr addrspace(1) %in) { -; GFX940-LABEL: global_agent_monotonic_idempotent_and_no_fined_grain: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: global_agent_monotonic_idempotent_and_no_fined_grain: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; OPT-LABEL: @global_agent_monotonic_idempotent_and_no_fined_grain( ; OPT-NEXT: entry: ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll index d5b6c19399a1f..75db7571444bc 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll @@ -2,7 +2,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck -check-prefix=VI %s ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s -; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s define amdgpu_kernel void @s_insertelement_v2bf16_0(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 { ; SI-LABEL: s_insertelement_v2bf16_0: @@ -46,18 +46,18 @@ define amdgpu_kernel void @s_insertelement_v2bf16_0(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: s_insertelement_v2bf16_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_lshr_b32 s2, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s2, 0x40a0, s2 -; GFX940-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: s_insertelement_v2bf16_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_lshr_b32 s2, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s2, 0x40a0, s2 +; GFX942-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %vec = load <2 x bfloat>, ptr addrspace(4) %vec.ptr %vecins = insertelement <2 x bfloat> %vec, bfloat 5.000000e+00, i32 0 store <2 x bfloat> %vecins, ptr addrspace(1) %out @@ -105,17 +105,17 @@ define amdgpu_kernel void @s_insertelement_v2bf16_1(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: s_insertelement_v2bf16_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_pack_ll_b32_b16 s2, s2, 0x40a0 -; GFX940-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: s_insertelement_v2bf16_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_pack_ll_b32_b16 s2, s2, 0x40a0 +; GFX942-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %vec = load <2 x bfloat>, ptr addrspace(4) %vec.ptr %vecins = insertelement <2 x bfloat> %vec, bfloat 5.000000e+00, i32 1 store <2 x bfloat> %vecins, ptr addrspace(1) %out @@ -171,19 +171,19 @@ define amdgpu_kernel void @v_insertelement_v2bf16_0(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v2bf16_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x40a0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v2bf16_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x40a0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -242,18 +242,18 @@ define amdgpu_kernel void @v_insertelement_v2bf16_0_inlineimm(ptr addrspace(1) % ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v2bf16_0_inlineimm: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v1, s2, 53, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v2bf16_0_inlineimm: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v1, s2, 53, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -313,19 +313,19 @@ define amdgpu_kernel void @v_insertelement_v2bf16_1(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v2bf16_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NEXT: s_movk_i32 s2, 0x40a0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, s2, v1, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v2bf16_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NEXT: s_movk_i32 s2, 0x40a0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, s2, v1, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -384,18 +384,18 @@ define amdgpu_kernel void @v_insertelement_v2bf16_1_inlineimm(ptr addrspace(1) % ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v2bf16_1_inlineimm: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, 35, v1, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v2bf16_1_inlineimm: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, 35, v1, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -475,24 +475,24 @@ define amdgpu_kernel void @v_insertelement_v2bf16_dynamic_vgpr(ptr addrspace(1) ; GFX900-NEXT: global_store_dword v0, v1, s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v2bf16_dynamic_vgpr: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dword v1, v0, s[6:7] -; GFX940-NEXT: global_load_dword v2, v0, s[2:3] -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX940-NEXT: v_lshlrev_b32_e64 v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x12341234 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v1, v1, s2, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v2bf16_dynamic_vgpr: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dword v1, v0, s[6:7] +; GFX942-NEXT: global_load_dword v2, v0, s[2:3] +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX942-NEXT: v_lshlrev_b32_e64 v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x12341234 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v1, v1, s2, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -559,20 +559,20 @@ define amdgpu_kernel void @v_insertelement_v4bf16_0(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v4bf16_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x30 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v4bf16_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x30 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -638,19 +638,19 @@ define amdgpu_kernel void @v_insertelement_v4bf16_1(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v4bf16_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, s6, v0, v3 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v4bf16_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, s6, v0, v3 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -717,20 +717,20 @@ define amdgpu_kernel void @v_insertelement_v4bf16_2(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v4bf16_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x30 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v3, s6 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v4bf16_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x30 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v3, s6 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -796,19 +796,19 @@ define amdgpu_kernel void @v_insertelement_v4bf16_3(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v4bf16_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, s6, v1, v3 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v4bf16_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, s6, v1, v3 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -892,24 +892,24 @@ define amdgpu_kernel void @v_insertelement_v4bf16_dynamic_sgpr(ptr addrspace(1) ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v4bf16_dynamic_sgpr: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] -; GFX940-NEXT: s_lshl_b32 s2, s7, 4 -; GFX940-NEXT: s_pack_ll_b32_b16 s4, s6, s6 -; GFX940-NEXT: s_lshl_b64 s[2:3], 0xffff, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, s4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v1, s3, v3, v1 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v4bf16_dynamic_sgpr: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] +; GFX942-NEXT: s_lshl_b32 s2, s7, 4 +; GFX942-NEXT: s_pack_ll_b32_b16 s4, s6, s6 +; GFX942-NEXT: s_lshl_b64 s[2:3], 0xffff, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, s4 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v1, s3, v3, v1 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -976,19 +976,19 @@ define amdgpu_kernel void @v_insertelement_v8bf16_3(ptr addrspace(1) %out, ptr a ; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v8bf16_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, s6, v1, v5 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v8bf16_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, s6, v1, v5 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <8 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -1159,51 +1159,51 @@ define amdgpu_kernel void @v_insertelement_v8bf16_dynamic(ptr addrspace(1) %out, ; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v8bf16_dynamic: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 4, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] -; GFX940-NEXT: s_cmp_eq_u32 s7, 6 -; GFX940-NEXT: v_mov_b32_e32 v5, s6 -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 7 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_cndmask_b32_e32 v6, v3, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 4 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 5 -; GFX940-NEXT: v_lshrrev_b32_e32 v7, 16, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 2 -; GFX940-NEXT: v_perm_b32 v3, v3, v6, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v7, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 3 -; GFX940-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 0 -; GFX940-NEXT: v_perm_b32 v2, v6, v2, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v8, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 1 -; GFX940-NEXT: v_lshrrev_b32_e32 v9, 16, v0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc -; GFX940-NEXT: v_perm_b32 v1, v6, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v8bf16_dynamic: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; GFX942-NEXT: s_cmp_eq_u32 s7, 6 +; GFX942-NEXT: v_mov_b32_e32 v5, s6 +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 7 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_cndmask_b32_e32 v6, v3, v5, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 4 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 5 +; GFX942-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 2 +; GFX942-NEXT: v_perm_b32 v3, v3, v6, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v7, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 3 +; GFX942-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 0 +; GFX942-NEXT: v_perm_b32 v2, v6, v2, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v8, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 1 +; GFX942-NEXT: v_lshrrev_b32_e32 v9, 16, v0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc +; GFX942-NEXT: v_perm_b32 v1, v6, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <8 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -1283,22 +1283,22 @@ define amdgpu_kernel void @v_insertelement_v16bf16_3(ptr addrspace(1) %out, ptr ; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v16bf16_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dword s6, s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 5, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, 0x5040100 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] -; GFX940-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:16 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_perm_b32 v1, s6, v1, v9 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v16bf16_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dword s6, s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 5, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, 0x5040100 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] +; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:16 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_perm_b32 v1, s6, v1, v9 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <16 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext @@ -1589,86 +1589,86 @@ define amdgpu_kernel void @v_insertelement_v16bf16_dynamic(ptr addrspace(1) %out ; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[0:1] ; GFX900-NEXT: s_endpgm ; -; GFX940-LABEL: v_insertelement_v16bf16_dynamic: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v8, 5, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] -; GFX940-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:16 -; GFX940-NEXT: s_cmp_eq_u32 s7, 6 -; GFX940-NEXT: v_mov_b32_e32 v9, s6 -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 7 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_cndmask_b32_e32 v10, v3, v9, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 4 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 5 -; GFX940-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 2 -; GFX940-NEXT: v_perm_b32 v3, v3, v10, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v11, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 3 -; GFX940-NEXT: v_lshrrev_b32_e32 v12, 16, v1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 0 -; GFX940-NEXT: v_perm_b32 v2, v10, v2, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v12, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 1 -; GFX940-NEXT: v_lshrrev_b32_e32 v13, 16, v0 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 14 -; GFX940-NEXT: v_perm_b32 v1, v10, v1, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v13, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 15 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v14, 16, v7 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 12 -; GFX940-NEXT: v_perm_b32 v0, v10, v0, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v14, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 13 -; GFX940-NEXT: v_lshrrev_b32_e32 v15, 16, v6 -; GFX940-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 10 -; GFX940-NEXT: v_perm_b32 v7, v10, v7, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v15, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 11 -; GFX940-NEXT: v_lshrrev_b32_e32 v16, 16, v5 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 8 -; GFX940-NEXT: v_perm_b32 v6, v10, v6, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v10, v16, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: s_cmp_eq_u32 s7, 9 -; GFX940-NEXT: v_lshrrev_b32_e32 v17, 16, v4 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc -; GFX940-NEXT: s_cselect_b64 vcc, -1, 0 -; GFX940-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc -; GFX940-NEXT: v_perm_b32 v5, v10, v5, s2 -; GFX940-NEXT: v_perm_b32 v4, v9, v4, s2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v_insertelement_v16bf16_dynamic: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v8, 5, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[2:3] +; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:16 +; GFX942-NEXT: s_cmp_eq_u32 s7, 6 +; GFX942-NEXT: v_mov_b32_e32 v9, s6 +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 7 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_cndmask_b32_e32 v10, v3, v9, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 4 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 5 +; GFX942-NEXT: v_lshrrev_b32_e32 v11, 16, v2 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 2 +; GFX942-NEXT: v_perm_b32 v3, v3, v10, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v11, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 3 +; GFX942-NEXT: v_lshrrev_b32_e32 v12, 16, v1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 0 +; GFX942-NEXT: v_perm_b32 v2, v10, v2, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v12, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 1 +; GFX942-NEXT: v_lshrrev_b32_e32 v13, 16, v0 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 14 +; GFX942-NEXT: v_perm_b32 v1, v10, v1, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v13, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 15 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v14, 16, v7 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 12 +; GFX942-NEXT: v_perm_b32 v0, v10, v0, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v14, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 13 +; GFX942-NEXT: v_lshrrev_b32_e32 v15, 16, v6 +; GFX942-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 10 +; GFX942-NEXT: v_perm_b32 v7, v10, v7, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v15, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 11 +; GFX942-NEXT: v_lshrrev_b32_e32 v16, 16, v5 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 8 +; GFX942-NEXT: v_perm_b32 v6, v10, v6, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v10, v16, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: s_cmp_eq_u32 s7, 9 +; GFX942-NEXT: v_lshrrev_b32_e32 v17, 16, v4 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc +; GFX942-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX942-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc +; GFX942-NEXT: v_perm_b32 v5, v10, v5, s2 +; GFX942-NEXT: v_perm_b32 v4, v9, v4, s2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <16 x bfloat>, ptr addrspace(1) %in, i64 %tid.ext diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir index 7d296ec20370d..4fe0ec45048ce 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s # GCN-LABEL: name: buffer_load_dword_lds # GCN: $m0 = S_MOV_B32 0 diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir index 3dccf49619738..f10b5378a7462 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-dma-waitcnt.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: buffer_load_dword_lds_ds_read # GCN: BUFFER_LOAD_DWORD_LDS_IDXEN diff --git a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll index 73f6dcb3a2a1d..902bec322aa1b 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-limit-diagnostics.ll @@ -1,8 +1,6 @@ ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT160K %s ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERROR-LIMIT64K %s diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll index 3f418ee80f877..1a46e6f6afcd7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX942 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX942 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s @@ -16,11 +16,11 @@ declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32) declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32) define float @test_cvt_f32_bf8_byte0(i32 %a) { -; GFX940-LABEL: test_cvt_f32_bf8_byte0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: test_cvt_f32_bf8_byte0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: test_cvt_f32_bf8_byte0: ; GFX950: ; %bb.0: @@ -102,11 +102,11 @@ define float @test_cvt_f32_bf8_byte3(i32 %a) { } define float @test_cvt_f32_fp8_byte0(i32 %a) { -; GFX940-LABEL: test_cvt_f32_fp8_byte0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: test_cvt_f32_fp8_byte0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: test_cvt_f32_fp8_byte0: ; GFX950: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll index 26e6bde97f499..4d31e30886e55 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX906 -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940 +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX942 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 @@ -27,7 +27,7 @@ entry: ; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp ; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX942: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: {{v_dot2c_f32_f16|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX12: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp( @@ -46,7 +46,7 @@ entry: ; GFX9-LABEL: {{^}}fdot2_inline_literal ; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 -; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX942: v_dot2c_f32_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX12: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}} define float @fdot2_inline_literal(<2 x half> %a, <2 x half> %b) { %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll index 8f67375a09cb7..86fb048bad775 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.gfx950.ll @@ -2,8 +2,8 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s ; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.load.lds diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll index 7362baf6bab95..daece9e776414 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX940 +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX942 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL @@ -24,13 +24,13 @@ define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %g ; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_load_lds_dword_vaddr: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: v_readfirstlane_b32 s0, v2 -; GFX940-NEXT: s_mov_b32 m0, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_load_lds_dword_vaddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_readfirstlane_b32 s0, v2 +; GFX942-NEXT: s_mov_b32 m0, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_vaddr: ; GFX10: ; %bb.0: ; %main_body @@ -69,14 +69,14 @@ define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture in ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_load_lds_dword_saddr: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: v_readfirstlane_b32 s2, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: s_mov_b32 m0, s2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_load_lds_dword_saddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_readfirstlane_b32 s2, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_saddr: ; GFX10: ; %bb.0: ; %main_body @@ -115,13 +115,13 @@ define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) no ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:48 scc lds ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_load_lds_dword_saddr_and_vaddr: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: v_readfirstlane_b32 s2, v0 -; GFX940-NEXT: s_mov_b32 m0, s2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_load_lds_dword_saddr_and_vaddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_readfirstlane_b32 s2, v0 +; GFX942-NEXT: s_mov_b32 m0, s2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX10: ; %bb.0: ; %main_body @@ -160,13 +160,13 @@ define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture % ; GFX90A-NEXT: global_load_ushort v[0:1], off lds ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_load_lds_ushort_vaddr: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: v_readfirstlane_b32 s0, v2 -; GFX940-NEXT: s_mov_b32 m0, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: global_load_lds_ushort v[0:1], off -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_load_lds_ushort_vaddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_readfirstlane_b32 s0, v2 +; GFX942-NEXT: s_mov_b32 m0, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_ushort v[0:1], off +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_ushort_vaddr: ; GFX10: ; %bb.0: ; %main_body @@ -203,13 +203,13 @@ define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %g ; GFX90A-NEXT: global_load_ubyte v[0:1], off lds ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: global_load_lds_ubyte_vaddr: -; GFX940: ; %bb.0: ; %main_body -; GFX940-NEXT: v_readfirstlane_b32 s0, v2 -; GFX940-NEXT: s_mov_b32 m0, s0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: global_load_lds_ubyte v[0:1], off -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: global_load_lds_ubyte_vaddr: +; GFX942: ; %bb.0: ; %main_body +; GFX942-NEXT: v_readfirstlane_b32 s0, v2 +; GFX942-NEXT: s_mov_b32 m0, s0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: global_load_lds_ubyte v[0:1], off +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_ubyte_vaddr: ; GFX10: ; %bb.0: ; %main_body diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index 8a460154e4789..352e5eecd7bfe 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32) @@ -17,7 +17,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 { @@ -36,7 +36,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 { @@ -55,7 +55,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 { @@ -74,7 +74,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 { @@ -93,7 +93,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 { @@ -109,8 +109,8 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f64_4x4x4f64: ; GFX90A: v_mfma_f64_4x4x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} ; GFX90A: v_mfma_f64_4x4x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} -; GFX940: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] +; GFX942: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} +; GFX942: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx2 define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: @@ -123,7 +123,7 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64: ; GCN: s_load_dwordx8 ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0] +; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { @@ -137,8 +137,8 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm: ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} -; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] +; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} +; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { @@ -151,7 +151,7 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_imm: ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} -; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} +; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { @@ -165,7 +165,7 @@ bb: ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000 ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} -; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} +; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll similarity index 91% rename from llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll rename to llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll index da191e4aa419d..ec4e1cbdf5792 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll @@ -1,11 +1,11 @@ -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,VGPRCD %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,VGPRCD %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,VGPRCD %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,AGPRCD %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,VGPRCD %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,VGPRCD %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,VGPRCD %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,AGPRCD %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32) @@ -34,12 +34,12 @@ declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32>, <4 x i3 declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32) ; GCN-LABEL: {{^}}test_mfma_i32_16x16x32i8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -52,12 +52,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_i32_32x32x16i8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[[[TWO]]:[[ONE]]], v[[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -70,12 +70,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x32_bf8_bf8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x32_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x32_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x32_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -88,12 +88,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x32_bf8_fp8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x32_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x32_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x32_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -106,12 +106,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x32_fp8_bf8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x32_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x32_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x32_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -124,12 +124,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_16x16x32_fp8_fp8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x32_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x32_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x32_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -142,12 +142,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x16_bf8_bf8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x16_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x16_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x16_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -160,12 +160,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x16_bf8_fp8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x16_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x16_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x16_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -178,12 +178,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x16_fp8_bf8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x16_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x16_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x16_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -196,12 +196,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x16_fp8_fp8: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 3 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x16_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x16_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[TWO]]:[[ONE]]], v{{\[}}[[FOUR]]:[[THREE]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x16_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 54023770ed0ff..8732c77778b01 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_40 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,GFX90A_40 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32) @@ -55,9 +55,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908-COUNT-2: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX908-COUNT-4: v_accvgpr_read_b32 @@ -81,9 +81,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN-DAG: s_load_dwordx16 ; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT: v_accvgpr_read_b32 ; GFX908-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX90A-NOT: v_accvgpr_read_b32 @@ -101,9 +101,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx4 ; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -121,9 +121,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN-DAG: s_load_dwordx16 ; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x2f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX90A-NOT: v_accvgpr_read_b32 @@ -141,9 +141,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN: s_load_dwordx4 ; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x4f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x4_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x4_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -160,9 +160,9 @@ bb: ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_load_dwordx16 ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x4_2b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x4_2b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-32: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -181,9 +181,9 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_16x16x4f16: ; GCN: s_load_dwordx16 ; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x4_4b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x4_4b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -203,9 +203,9 @@ bb: ; GCN: s_load_dwordx4 ; GCN: s_load_dwordx4 ; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_4x4x4f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_4x4x4_16b_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_4x4x4_16b_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -226,9 +226,9 @@ bb: ; GCN: s_waitcnt lgkmcnt(0) ; GFX908_A: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -248,9 +248,9 @@ bb: ; GCN: s_load_dwordx4 ; GCN: s_load_dwordx4 ; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x16f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_16x16x16_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x16_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -303,9 +303,9 @@ bb: ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_32x32x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_i32_32x32x4_2b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_i32_32x32x4_2b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-32: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -323,9 +323,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-DAG: s_load_dwordx16 ; GFX908-DAG-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_16x16x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -343,9 +343,9 @@ bb: ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN: s_load_dwordx4 ; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} +; GFX90A_42-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} ; GFX908_A: v_mfma_i32_4x4x4i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_i32_4x4x4_16b_i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_i32_4x4x4_16b_i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -361,8 +361,8 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_forward_acc: ; GFX908_A: v_mfma_f32_32x32x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908_A-NEXT: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -; GFX940: v_mfma_f32_32x32x1_2b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940-NEXT: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] +; GFX942: v_mfma_f32_32x32x1_2b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: %in.1 = load <32 x float>, ptr addrspace(1) %arg @@ -375,8 +375,8 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32_forward_acc: ; GFX908_A: v_mfma_f32_16x16x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908_A-NEXT: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -; GFX940: v_mfma_f32_16x16x1_4b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940-NEXT: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] +; GFX942: v_mfma_f32_16x16x1_4b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942-NEXT: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] define amdgpu_kernel void @test_mfma_f32_16x16x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: %in.1 = load <16 x float>, ptr addrspace(1) %arg @@ -389,9 +389,9 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_forward_acc: ; GFX908_A: v_mfma_f32_4x4x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908_A-NEXT: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -; GFX940: v_mfma_f32_4x4x1_16b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] +; GFX942: v_mfma_f32_4x4x1_16b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: %in.1 = load <4 x float>, ptr addrspace(1) %arg @@ -411,7 +411,7 @@ bb: ; NOLIT-SRCC: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}] ; LIT-SRCC: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], 1.0 ; GFX90A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], 1.0 -; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], 1.0 +; GFX942: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], 1.0 ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -430,7 +430,7 @@ bb: ; NOLIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}] ; LIT-SRCC: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0 ; GFX90A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0 -; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0 +; GFX942: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 1.0 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -449,7 +449,7 @@ bb: ; NOLIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9:]+}}] ; LIT-SRCC: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 ; GFX90A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 -; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 +; GFX942: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], 1.0 ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -468,7 +468,7 @@ bb: ; NOLIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9:]+}}] ; LIT-SRCC: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0 ; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0 -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0 +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], 0 ; GFX908-COUNT-32: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -488,7 +488,7 @@ bb: ; GFX90A-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GFX90A-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GFX908_A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -506,7 +506,7 @@ bb: ; GFX908-COUNT-14: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0 ; GFX90A-COUNT-14: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GFX908_A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908-COUNT-16: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -582,7 +582,7 @@ bb: ; GFX90A-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GFX90A-DAG: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GFX908_A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908-COUNT-32: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -604,7 +604,7 @@ bb: ; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, [[TTMPA]] ; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, [[TTMPA]] ; GFX908_A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 @@ -622,16 +622,16 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat_bad_code: ; GCN: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000 ; GCN: v_accvgpr_write_b32 [[AGPR:a[0-9]+]], [[TMP0]] -; GFX90A_40-COUNT-3: v_accvgpr_mov_b32 a{{[0-9]+}}, [[AGPR]] +; GFX90A_42-COUNT-3: v_accvgpr_mov_b32 a{{[0-9]+}}, [[AGPR]] ; GFX908-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]] ; GFX908-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]] ; GFX908-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]] ; GCN: s_nop 0 ; GFX908_A: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] -; GFX940: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX942: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], {{v[0-9]+}}, {{v[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] -; GFX90A_40: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GFX90A_42: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat_bad_code(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -643,20 +643,20 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vecarg: -; GFX90A_40-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 -; GFX90A_40-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GFX90A_42-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GFX90A_42-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GCN-COUNT-8: global_load_dwordx4 ; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} -; GFX90A_40-NOT: v_accvgpr_write +; GFX90A_42-NOT: v_accvgpr_write ; GFX908-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GFX908-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 ; GFX908: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GFX908: v_accvgpr_read_b32 ; GFX908-COUNT-8: global_store_dwordx4 -; GFX90A_40-NOT: v_accvgpr_read_b32 -; GFX90A_40-COUNT-5: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GFX90A_42-NOT: v_accvgpr_read_b32 +; GFX90A_42-COUNT-5: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll similarity index 79% rename from llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx940.ll rename to llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll index 0ee1ecfaffb15..c98929c7d89aa 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll @@ -1,18 +1,18 @@ -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32) ; GCN-LABEL: {{^}}test_mfma_f32_16x16x8xf32: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] @@ -25,12 +25,12 @@ bb: } ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4xf32: -; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0 -; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0 -; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 -; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 +; GFX942-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0 +; GFX942-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0 +; GFX942-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000 +; GFX942-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0 ; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}} -; GFX940: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 +; GFX942: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GISEL: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll index e1cebe28f7fe8..cba53175087ec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.swap.ll @@ -2,8 +2,8 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s ; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane16.swap ; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane16.swap) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll index 121c379053fcf..963c73d61535b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane32.swap.ll @@ -2,8 +2,8 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s ; ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.permlane32.swap ; ERR-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32), %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.permlane32.swap) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll index 09cc55b53539b..a0f03d020b989 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll @@ -8,8 +8,8 @@ ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-SDAG %s ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-SDAG %s ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-SDAG %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-SDAG %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-SDAG %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-SDAG %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-SDAG %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/raw-ret-f32-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-F32-GISEL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-f32-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-F32-GISEL %s @@ -19,8 +19,8 @@ ; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s ; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s ; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s -; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s +; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %t/raw-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-RAW-V2BF16-GISEL %s +; RUN: not --crash llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %t/struct-ret-v2bf16-error.ll 2>&1 | FileCheck -check-prefix=ERR-STRUCT-V2BF16-GISEL %s ; Make sure buffer fadd atomics with return values are not selected ; for gfx908 where they do not work. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_nortn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_nortn.ll index 206cc9f2ec28d..8141e0df46737 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_nortn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_nortn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { @@ -19,12 +19,12 @@ define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voff ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -54,12 +54,12 @@ define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -89,12 +89,12 @@ define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -124,12 +124,12 @@ define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffs ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -159,12 +159,12 @@ define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voff ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen nt -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen nt +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; GFX12: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_rtn.ll index 4a87ca8ad42fd..767117dc99fd4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd_rtn.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { @@ -11,12 +11,12 @@ define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffs ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -40,12 +40,12 @@ define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset_ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -69,12 +69,12 @@ define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgp ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -98,12 +98,12 @@ define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_v ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX12: ; %bb.0: @@ -127,12 +127,12 @@ define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffs ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 nt -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 nt +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; GFX12: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll index 58b1d0da4a5f3..476a9e9c8cd69 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.gfx950.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s ; FIXME: Not a great error ; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand! diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll index 91217c219c451..c2a0028f4f1f1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) #0 { @@ -21,14 +21,14 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_v ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -59,12 +59,12 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voff ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 idxen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 idxen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -96,14 +96,14 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_v ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen nt -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen nt +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; GFX1200: ; %bb.0: @@ -135,14 +135,14 @@ define void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v[2:3], s[0:3], s16 idxen offen -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v[2:3], s[0:3], s16 idxen offen +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -223,39 +223,39 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v9, v6 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v2 -; GFX940-NEXT: v_readfirstlane_b32 s5, v3 -; GFX940-NEXT: v_readfirstlane_b32 s6, v4 -; GFX940-NEXT: v_readfirstlane_b32 s7, v5 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] -; GFX940-NEXT: v_readfirstlane_b32 s8, v7 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 -; GFX940-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s8 idxen offen -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr7 -; GFX940-NEXT: ; implicit-def: $vgpr0 -; GFX940-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, v6 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v2 +; GFX942-NEXT: v_readfirstlane_b32 s5, v3 +; GFX942-NEXT: v_readfirstlane_b32 s6, v4 +; GFX942-NEXT: v_readfirstlane_b32 s7, v5 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] +; GFX942-NEXT: v_readfirstlane_b32 s8, v7 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 +; GFX942-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s8 idxen offen +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr7 +; GFX942-NEXT: ; implicit-def: $vgpr0 +; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: ; GFX1200: ; %bb.0: @@ -362,39 +362,39 @@ define void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v9, v6 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v2 -; GFX940-NEXT: v_readfirstlane_b32 s5, v3 -; GFX940-NEXT: v_readfirstlane_b32 s6, v4 -; GFX940-NEXT: v_readfirstlane_b32 s7, v5 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] -; GFX940-NEXT: v_readfirstlane_b32 s8, v7 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 -; GFX940-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s8 idxen offen -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr7 -; GFX940-NEXT: ; implicit-def: $vgpr0 -; GFX940-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, v6 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v2 +; GFX942-NEXT: v_readfirstlane_b32 s5, v3 +; GFX942-NEXT: v_readfirstlane_b32 s6, v4 +; GFX942-NEXT: v_readfirstlane_b32 s7, v5 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] +; GFX942-NEXT: v_readfirstlane_b32 s8, v7 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 +; GFX942-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s8 idxen offen +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr7 +; GFX942-NEXT: ; implicit-def: $vgpr0 +; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: ; GFX1200: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll index 80fd1e05477f1..104462a506c8c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) #0 { @@ -13,14 +13,14 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -45,12 +45,12 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffs ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 idxen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 idxen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -76,14 +76,14 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen sc0 nt -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[2:3], s[0:3], s16 idxen offen sc0 nt +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: ; GFX1200: ; %bb.0: @@ -109,14 +109,14 @@ define <2 x half> @struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v[2:3], s[0:3], s16 idxen offen sc0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v[2:3], s[0:3], s16 idxen offen sc0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: ; GFX1200: ; %bb.0: @@ -168,39 +168,39 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v9, v6 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v2 -; GFX940-NEXT: v_readfirstlane_b32 s5, v3 -; GFX940-NEXT: v_readfirstlane_b32 s6, v4 -; GFX940-NEXT: v_readfirstlane_b32 s7, v5 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] -; GFX940-NEXT: v_readfirstlane_b32 s8, v7 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 -; GFX940-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s8 idxen offen sc0 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr7 -; GFX940-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, v6 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v2 +; GFX942-NEXT: v_readfirstlane_b32 s5, v3 +; GFX942-NEXT: v_readfirstlane_b32 s6, v4 +; GFX942-NEXT: v_readfirstlane_b32 s7, v5 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] +; GFX942-NEXT: v_readfirstlane_b32 s8, v7 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 +; GFX942-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s8 idxen offen sc0 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr7 +; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: ; GFX1200: ; %bb.0: @@ -278,39 +278,39 @@ define <2 x half> @struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__ ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v9, v6 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: s_mov_b64 s[2:3], exec -; GFX940-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: v_readfirstlane_b32 s4, v2 -; GFX940-NEXT: v_readfirstlane_b32 s5, v3 -; GFX940-NEXT: v_readfirstlane_b32 s6, v4 -; GFX940-NEXT: v_readfirstlane_b32 s7, v5 -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] -; GFX940-NEXT: v_readfirstlane_b32 s8, v7 -; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] -; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 -; GFX940-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s8 idxen offen sc0 -; GFX940-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GFX940-NEXT: ; implicit-def: $vgpr7 -; GFX940-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v9, v6 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: s_mov_b64 s[2:3], exec +; GFX942-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: v_readfirstlane_b32 s4, v2 +; GFX942-NEXT: v_readfirstlane_b32 s5, v3 +; GFX942-NEXT: v_readfirstlane_b32 s6, v4 +; GFX942-NEXT: v_readfirstlane_b32 s7, v5 +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] +; GFX942-NEXT: v_readfirstlane_b32 s8, v7 +; GFX942-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[4:5] +; GFX942-NEXT: s_and_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, s8, v7 +; GFX942-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s8 idxen offen sc0 +; GFX942-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 +; GFX942-NEXT: ; implicit-def: $vgpr7 +; GFX942-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX942-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX1200-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: ; GFX1200: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64.ll index 8254a86a3467a..eb0ac4ecac68c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s ; Not supported in gfx8 or gfx9, except 90a/940 ; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s -; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64.ll index 3934a1f9c02aa..92c717c7d68d9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s ; Not supported in gfx8 or gfx9, except 90a/940 ; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s -; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s define double @struct_ptr_buffer_atomic_fmin_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll index cfe9545b074e3..654e72daffedd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.gfx950.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s ; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand! ; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.ptr.buffer.load.lds), diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll index 2c1472313ca7a..2346cf0ba6303 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX906 -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-SDAG -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX940-GISEL +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX942-SDAG +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9,GFX942-GISEL ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 @@ -43,10 +43,10 @@ entry: ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_op_sel: ; GFX906: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}} -; GFX940-SDAG: s_mov_b32 [[K:s[0-9]+]], 0x10001 -; GFX940-SDAG: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX940-GISEL: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001 -; GFX940-GISEL: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} +; GFX942-SDAG: s_mov_b32 [[K:s[0-9]+]], 0x10001 +; GFX942-SDAG: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX942-GISEL: v_mov_b32_e32 [[K:v[0-9]+]], 0x10001 +; GFX942-GISEL: v_dot2_u32_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_op_sel( ptr addrspace(1) %r, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll index 7895c60f3467e..c3de1db5ec8e0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll index c2df2d9882f3b..c976962ffc014 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10 diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll index 5fd6deff0fbbb..50b6ad9f0cb37 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,13 +28,13 @@ define float @local_atomic_fadd_ret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f32: ; GFX11: ; %bb.0: @@ -143,13 +143,13 @@ define float @local_atomic_fadd_ret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f32__offset: ; GFX11: ; %bb.0: @@ -259,13 +259,13 @@ define void @local_atomic_fadd_noret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f32: ; GFX11: ; %bb.0: @@ -372,13 +372,13 @@ define void @local_atomic_fadd_noret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_f32 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_f32 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f32__offset: ; GFX11: ; %bb.0: @@ -507,13 +507,13 @@ define double @local_atomic_fadd_ret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f64: ; GFX11: ; %bb.0: @@ -698,13 +698,13 @@ define double @local_atomic_fadd_ret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f64__offset: ; GFX11: ; %bb.0: @@ -888,13 +888,13 @@ define void @local_atomic_fadd_noret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_f64 v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_f64 v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f64: ; GFX11: ; %bb.0: @@ -1070,13 +1070,13 @@ define void @local_atomic_fadd_noret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_add_f64 v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_add_f64 v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f64__offset: ; GFX11: ; %bb.0: @@ -1272,35 +1272,35 @@ define half @local_atomic_fadd_ret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v3 -; GFX940-NEXT: v_lshlrev_b32_e64 v3, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v3, v3 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v0, v4 -; GFX940-NEXT: v_add_f16_e32 v2, 4.0, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v4, v3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v3 +; GFX942-NEXT: v_lshlrev_b32_e64 v3, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v3, v3 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v0, v4 +; GFX942-NEXT: v_add_f16_e32 v2, 4.0, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v4, v3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f16: ; GFX11: ; %bb.0: @@ -1578,36 +1578,36 @@ define half @local_atomic_fadd_ret_f16__offset(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v2, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v3, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v3, v3 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v0, v4 -; GFX940-NEXT: v_add_f16_e32 v2, 4.0, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v4, v3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v2, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v3, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v3, v3 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v0, v4 +; GFX942-NEXT: v_add_f16_e32 v2, 4.0, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v4, v3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f16__offset: ; GFX11: ; %bb.0: @@ -1891,34 +1891,34 @@ define void @local_atomic_fadd_noret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v0, v3 -; GFX940-NEXT: v_add_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v0, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v0, v3 +; GFX942-NEXT: v_add_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v0, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f16: ; GFX11: ; %bb.0: @@ -2185,35 +2185,35 @@ define void @local_atomic_fadd_noret_f16__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v1, v3 -; GFX940-NEXT: v_add_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v1, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v1, v3 +; GFX942-NEXT: v_add_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v1, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f16__offset: ; GFX11: ; %bb.0: @@ -2479,28 +2479,28 @@ define half @local_atomic_fadd_ret_f16__offset__align4(ptr addrspace(3) %ptr) no ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_add_f16_e32 v1, 4.0, v2 -; GFX940-NEXT: v_and_or_b32 v1, v2, s2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_add_f16_e32 v1, 4.0, v2 +; GFX942-NEXT: v_and_or_b32 v1, v2, s2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2714,27 +2714,27 @@ define void @local_atomic_fadd_noret_f16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f16_e32 v2, 4.0, v1 -; GFX940-NEXT: v_and_or_b32 v2, v1, s2, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f16_e32 v2, 4.0, v1 +; GFX942-NEXT: v_and_or_b32 v2, v1, s2, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2964,43 +2964,43 @@ define bfloat @local_atomic_fadd_ret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_bf16: ; GFX11: ; %bb.0: @@ -3319,44 +3319,44 @@ define bfloat @local_atomic_fadd_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_bf16__offset: ; GFX11: ; %bb.0: @@ -3681,42 +3681,42 @@ define void @local_atomic_fadd_noret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_bf16: ; GFX11: ; %bb.0: @@ -4024,43 +4024,43 @@ define void @local_atomic_fadd_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_bf16__offset: ; GFX11: ; %bb.0: @@ -4367,37 +4367,37 @@ define bfloat @local_atomic_fadd_ret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_add_f32_e32 v1, 4.0, v1 -; GFX940-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v3, v3, v1, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB18_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_add_f32_e32 v1, 4.0, v1 +; GFX942-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v3, v3, v1, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4658,36 +4658,36 @@ define void @local_atomic_fadd_noret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: v_add_f32_e32 v2, 4.0, v2 -; GFX940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v3, v3, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: v_add_f32_e32 v2, 4.0, v2 +; GFX942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v3, v3, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4917,12 +4917,12 @@ define <2 x half> @local_atomic_fadd_ret_v2f16(ptr addrspace(3) %ptr, <2 x half> ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_v2f16: ; GFX11: ; %bb.0: @@ -5142,12 +5142,12 @@ define <2 x half> @local_atomic_fadd_ret_v2f16__offset(ptr addrspace(3) %ptr, <2 ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_f16 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5367,12 +5367,12 @@ define void @local_atomic_fadd_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_f16 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_f16 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_v2f16: ; GFX11: ; %bb.0: @@ -5583,12 +5583,12 @@ define void @local_atomic_fadd_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_f16 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_f16 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5805,12 +5805,12 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_v2bf16: ; GFX11: ; %bb.0: @@ -6112,12 +6112,12 @@ define <2 x bfloat> @local_atomic_fadd_ret_v2bf16__offset(ptr addrspace(3) %ptr, ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -6420,12 +6420,12 @@ define void @local_atomic_fadd_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat> ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_bf16 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_bf16 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_v2bf16: ; GFX11: ; %bb.0: @@ -6717,12 +6717,12 @@ define void @local_atomic_fadd_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_v2bf16__ofset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_pk_add_bf16 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_v2bf16__ofset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_pk_add_bf16 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_v2bf16__ofset: ; GFX11: ; %bb.0: @@ -7103,89 +7103,89 @@ define amdgpu_kernel void @local_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) ; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX12-NEXT: s_endpgm ; -; GFX940-LABEL: local_ds_fadd: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_add_i32 s3, s3, 4 -; GFX940-NEXT: ; implicit-def: $vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GFX940-NEXT: s_cbranch_execz .LBB28_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: s_lshl_b32 s8, s3, 3 -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 -; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: ds_add_rtn_f32 v1, v2, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB28_2: -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_readfirstlane_b32 s10, v1 -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v1, s8, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1 -; GFX940-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB28_4 -; GFX940-NEXT: ; %bb.3: -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[8:9] -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 -; GFX940-NEXT: s_lshl_b32 s0, s3, 4 -; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: ds_add_f32 v2, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB28_4: -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 -; GFX940-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 -; GFX940-NEXT: v_add_f32_e32 v0, s10, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, s10 -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc -; GFX940-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX940-NEXT: ; implicit-def: $vgpr0 -; GFX940-NEXT: .LBB28_5: ; %ComputeLoop -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_ff1_i32_b64 s3, s[0:1] -; GFX940-NEXT: s_lshl_b64 s[6:7], 1, s3 -; GFX940-NEXT: v_readfirstlane_b32 s8, v1 -; GFX940-NEXT: v_readlane_b32 s9, v2, s3 -; GFX940-NEXT: s_mov_b32 m0, s3 -; GFX940-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX940-NEXT: v_writelane_b32 v0, s8, m0 -; GFX940-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX940-NEXT: v_add_f32_e32 v1, s9, v1 -; GFX940-NEXT: s_cbranch_scc1 .LBB28_5 -; GFX940-NEXT: ; %bb.6: ; %ComputeEnd -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v2 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX940-NEXT: ; implicit-def: $vgpr2 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB28_8 -; GFX940-NEXT: ; %bb.7: -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: ds_add_rtn_f32 v2, v2, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: .LBB28_8: -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NEXT: v_readfirstlane_b32 s2, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v0, s2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_store_dword v1, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_ds_fadd: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_add_i32 s3, s3, 4 +; GFX942-NEXT: ; implicit-def: $vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX942-NEXT: s_cbranch_execz .LBB28_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: s_lshl_b32 s8, s3, 3 +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 +; GFX942-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: ds_add_rtn_f32 v1, v2, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB28_2: +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: v_readfirstlane_b32 s10, v1 +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v1, s8, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1 +; GFX942-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB28_4 +; GFX942-NEXT: ; %bb.3: +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[8:9] +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 +; GFX942-NEXT: s_lshl_b32 s0, s3, 4 +; GFX942-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: ds_add_f32 v2, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB28_4: +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; GFX942-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 +; GFX942-NEXT: v_add_f32_e32 v0, s10, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, s10 +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc +; GFX942-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX942-NEXT: ; implicit-def: $vgpr0 +; GFX942-NEXT: .LBB28_5: ; %ComputeLoop +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_ff1_i32_b64 s3, s[0:1] +; GFX942-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX942-NEXT: v_readfirstlane_b32 s8, v1 +; GFX942-NEXT: v_readlane_b32 s9, v2, s3 +; GFX942-NEXT: s_mov_b32 m0, s3 +; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] +; GFX942-NEXT: v_writelane_b32 v0, s8, m0 +; GFX942-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX942-NEXT: v_add_f32_e32 v1, s9, v1 +; GFX942-NEXT: s_cbranch_scc1 .LBB28_5 +; GFX942-NEXT: ; %bb.6: ; %ComputeEnd +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v2 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX942-NEXT: ; implicit-def: $vgpr2 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB28_8 +; GFX942-NEXT: ; %bb.7: +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: ds_add_rtn_f32 v2, v2, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: .LBB28_8: +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NEXT: v_readfirstlane_b32 s2, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_store_dword v1, v0, s[0:1] +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: local_ds_fadd: ; GFX11: ; %bb.0: @@ -7966,86 +7966,86 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs ; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX12-NEXT: s_endpgm ; -; GFX940-LABEL: local_ds_fadd_one_as: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_add_i32 s3, s3, 4 -; GFX940-NEXT: ; implicit-def: $vgpr1 -; GFX940-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GFX940-NEXT: s_cbranch_execz .LBB29_2 -; GFX940-NEXT: ; %bb.1: -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX940-NEXT: s_lshl_b32 s8, s3, 3 -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 -; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: ds_add_rtn_f32 v1, v2, v1 -; GFX940-NEXT: .LBB29_2: -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_readfirstlane_b32 s10, v1 -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v1, s8, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1 -; GFX940-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 -; GFX940-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] -; GFX940-NEXT: s_cbranch_execz .LBB29_4 -; GFX940-NEXT: ; %bb.3: -; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[8:9] -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 -; GFX940-NEXT: s_lshl_b32 s0, s3, 4 -; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: ds_add_f32 v2, v1 -; GFX940-NEXT: .LBB29_4: -; GFX940-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 -; GFX940-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 -; GFX940-NEXT: v_add_f32_e32 v0, s10, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, s10 -; GFX940-NEXT: s_mov_b64 s[0:1], exec -; GFX940-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc -; GFX940-NEXT: v_bfrev_b32_e32 v1, 1 -; GFX940-NEXT: ; implicit-def: $vgpr0 -; GFX940-NEXT: .LBB29_5: ; %ComputeLoop -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_ff1_i32_b64 s3, s[0:1] -; GFX940-NEXT: s_lshl_b64 s[6:7], 1, s3 -; GFX940-NEXT: v_readfirstlane_b32 s8, v1 -; GFX940-NEXT: v_readlane_b32 s9, v2, s3 -; GFX940-NEXT: s_mov_b32 m0, s3 -; GFX940-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] -; GFX940-NEXT: v_writelane_b32 v0, s8, m0 -; GFX940-NEXT: s_cmp_lg_u64 s[0:1], 0 -; GFX940-NEXT: v_add_f32_e32 v1, s9, v1 -; GFX940-NEXT: s_cbranch_scc1 .LBB29_5 -; GFX940-NEXT: ; %bb.6: ; %ComputeEnd -; GFX940-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 -; GFX940-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v2 -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX940-NEXT: ; implicit-def: $vgpr2 -; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX940-NEXT: ; %bb.7: -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: ds_add_rtn_f32 v2, v2, v1 -; GFX940-NEXT: ; %bb.8: -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_readfirstlane_b32 s2, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v0, s2, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX940-NEXT: global_store_dword v1, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: local_ds_fadd_one_as: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_add_i32 s3, s3, 4 +; GFX942-NEXT: ; implicit-def: $vgpr1 +; GFX942-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX942-NEXT: s_cbranch_execz .LBB29_2 +; GFX942-NEXT: ; %bb.1: +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; GFX942-NEXT: s_lshl_b32 s8, s3, 3 +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 +; GFX942-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: ds_add_rtn_f32 v1, v2, v1 +; GFX942-NEXT: .LBB29_2: +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: s_mov_b64 s[8:9], exec +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_readfirstlane_b32 s10, v1 +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v1, s8, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1 +; GFX942-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1 +; GFX942-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] +; GFX942-NEXT: s_cbranch_execz .LBB29_4 +; GFX942-NEXT: ; %bb.3: +; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[8:9] +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0 +; GFX942-NEXT: s_lshl_b32 s0, s3, 4 +; GFX942-NEXT: v_mul_f32_e32 v1, 0x42280000, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: ds_add_f32 v2, v1 +; GFX942-NEXT: .LBB29_4: +; GFX942-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; GFX942-NEXT: v_mul_f32_e32 v0, 0x42280000, v0 +; GFX942-NEXT: v_add_f32_e32 v0, s10, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, s10 +; GFX942-NEXT: s_mov_b64 s[0:1], exec +; GFX942-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc +; GFX942-NEXT: v_bfrev_b32_e32 v1, 1 +; GFX942-NEXT: ; implicit-def: $vgpr0 +; GFX942-NEXT: .LBB29_5: ; %ComputeLoop +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_ff1_i32_b64 s3, s[0:1] +; GFX942-NEXT: s_lshl_b64 s[6:7], 1, s3 +; GFX942-NEXT: v_readfirstlane_b32 s8, v1 +; GFX942-NEXT: v_readlane_b32 s9, v2, s3 +; GFX942-NEXT: s_mov_b32 m0, s3 +; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], s[6:7] +; GFX942-NEXT: v_writelane_b32 v0, s8, m0 +; GFX942-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX942-NEXT: v_add_f32_e32 v1, s9, v1 +; GFX942-NEXT: s_cbranch_scc1 .LBB29_5 +; GFX942-NEXT: ; %bb.6: ; %ComputeEnd +; GFX942-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 +; GFX942-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v2 +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX942-NEXT: ; implicit-def: $vgpr2 +; GFX942-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX942-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX942-NEXT: ; %bb.7: +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: ds_add_rtn_f32 v2, v2, v1 +; GFX942-NEXT: ; %bb.8: +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_readfirstlane_b32 s2, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX942-NEXT: global_store_dword v1, v0, s[0:1] +; GFX942-NEXT: s_endpgm ; ; GFX11-LABEL: local_ds_fadd_one_as: ; GFX11: ; %bb.0: @@ -8725,13 +8725,13 @@ define float @local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_ret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -8840,13 +8840,13 @@ define void @local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_add_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_add_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fadd_noret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll index 1e8072460c7a3..681c07db327dc 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,13 +28,13 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f32: ; GFX11: ; %bb.0: @@ -117,13 +117,13 @@ define float @local_atomic_fmax_ret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f32__offset: ; GFX11: ; %bb.0: @@ -208,13 +208,13 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f32: ; GFX11: ; %bb.0: @@ -297,13 +297,13 @@ define void @local_atomic_fmax_noret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_f32 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_f32 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f32__offset: ; GFX11: ; %bb.0: @@ -393,13 +393,13 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f64: ; GFX11: ; %bb.0: @@ -490,13 +490,13 @@ define double @local_atomic_fmax_ret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f64__offset: ; GFX11: ; %bb.0: @@ -589,13 +589,13 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_max_f64 v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_max_f64 v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f64: ; GFX11: ; %bb.0: @@ -686,13 +686,13 @@ define void @local_atomic_fmax_noret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_max_f64 v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_max_f64 v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f64__offset: ; GFX11: ; %bb.0: @@ -817,36 +817,36 @@ define half @local_atomic_fmax_ret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, v0, v4 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX940-NEXT: v_max_f16_e32 v3, 4.0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, v0, v3 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, v0, v4 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX942-NEXT: v_max_f16_e32 v3, 4.0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, v0, v3 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f16: ; GFX11: ; %bb.0: @@ -1130,37 +1130,37 @@ define half @local_atomic_fmax_ret_f16__offset(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, v0, v4 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX940-NEXT: v_max_f16_e32 v3, 4.0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, v0, v3 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, v0, v4 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX942-NEXT: v_max_f16_e32 v3, 4.0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, v0, v3 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f16__offset: ; GFX11: ; %bb.0: @@ -1450,35 +1450,35 @@ define void @local_atomic_fmax_noret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v0, v3 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_max_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v0, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v0, v3 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_max_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v0, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f16: ; GFX11: ; %bb.0: @@ -1752,36 +1752,36 @@ define void @local_atomic_fmax_noret_f16__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v1, v3 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_max_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v1, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v1, v3 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_max_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v1, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f16__offset: ; GFX11: ; %bb.0: @@ -2054,29 +2054,29 @@ define half @local_atomic_fmax_ret_f16__offset__align4(ptr addrspace(3) %ptr) no ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_max_f16_e32 v1, v2, v2 -; GFX940-NEXT: v_max_f16_e32 v1, 4.0, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_max_f16_e32 v1, v2, v2 +; GFX942-NEXT: v_max_f16_e32 v1, 4.0, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2297,28 +2297,28 @@ define void @local_atomic_fmax_noret_f16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v1, v1 -; GFX940-NEXT: v_max_f16_e32 v2, 4.0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s2, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v1, v1 +; GFX942-NEXT: v_max_f16_e32 v2, 4.0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s2, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2554,43 +2554,43 @@ define bfloat @local_atomic_fmax_ret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_bf16: ; GFX11: ; %bb.0: @@ -2911,44 +2911,44 @@ define bfloat @local_atomic_fmax_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_bf16__offset: ; GFX11: ; %bb.0: @@ -3275,42 +3275,42 @@ define void @local_atomic_fmax_noret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_bf16: ; GFX11: ; %bb.0: @@ -3620,43 +3620,43 @@ define void @local_atomic_fmax_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_max_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_max_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_bf16__offset: ; GFX11: ; %bb.0: @@ -3965,37 +3965,37 @@ define bfloat @local_atomic_fmax_ret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v1 -; GFX940-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v3, v3, v1, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB18_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_max_f32_e32 v1, 4.0, v1 +; GFX942-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v3, v3, v1, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4258,36 +4258,36 @@ define void @local_atomic_fmax_noret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v2 -; GFX940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v3, v3, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: v_max_f32_e32 v2, 4.0, v2 +; GFX942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v3, v3, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4538,29 +4538,29 @@ define <2 x half> @local_atomic_fmax_ret_v2f16(ptr addrspace(3) %ptr, <2 x half> ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_v2f16: ; GFX11: ; %bb.0: @@ -4809,29 +4809,29 @@ define <2 x half> @local_atomic_fmax_ret_v2f16__offset(ptr addrspace(3) %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB21_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v2, v2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB21_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v2, v2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5079,28 +5079,28 @@ define void @local_atomic_fmax_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v3, v3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v3, v3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_v2f16: ; GFX11: ; %bb.0: @@ -5340,28 +5340,28 @@ define void @local_atomic_fmax_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB23_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_max_f16 v3, v3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB23_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_max_f16 v3, v3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5627,45 +5627,45 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB24_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB24_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_v2bf16: ; GFX11: ; %bb.0: @@ -6005,45 +6005,45 @@ define <2 x bfloat> @local_atomic_fmax_ret_v2bf16__offset(ptr addrspace(3) %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB25_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB25_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB25_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -6382,44 +6382,44 @@ define void @local_atomic_fmax_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat> ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_v2bf16: ; GFX11: ; %bb.0: @@ -6747,44 +6747,44 @@ define void @local_atomic_fmax_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_v2bf16__ofset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_max_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_v2bf16__ofset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_max_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_max_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_v2bf16__ofset: ; GFX11: ; %bb.0: @@ -7083,13 +7083,13 @@ define float @local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_ret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -7172,13 +7172,13 @@ define void @local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_max_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_max_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmax_noret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll index 7249b0b1fc0e3..bf56496e98690 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -28,13 +28,13 @@ define float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f32: ; GFX11: ; %bb.0: @@ -117,13 +117,13 @@ define float @local_atomic_fmin_ret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f32__offset: ; GFX11: ; %bb.0: @@ -208,13 +208,13 @@ define void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f32: ; GFX11: ; %bb.0: @@ -297,13 +297,13 @@ define void @local_atomic_fmin_noret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_f32 v0, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_f32 v0, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f32__offset: ; GFX11: ; %bb.0: @@ -393,13 +393,13 @@ define double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f64: ; GFX11: ; %bb.0: @@ -490,13 +490,13 @@ define double @local_atomic_fmin_ret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f64__offset: ; GFX11: ; %bb.0: @@ -589,13 +589,13 @@ define void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_min_f64 v0, v[2:3] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_min_f64 v0, v[2:3] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f64: ; GFX11: ; %bb.0: @@ -686,13 +686,13 @@ define void @local_atomic_fmin_noret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 -; GFX940-NEXT: ds_min_f64 v0, v[2:3] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0 +; GFX942-NEXT: ds_min_f64 v0, v[2:3] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f64__offset: ; GFX11: ; %bb.0: @@ -817,36 +817,36 @@ define half @local_atomic_fmin_ret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, v0, v4 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX940-NEXT: v_min_f16_e32 v3, 4.0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, v0, v3 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, v0, v4 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX942-NEXT: v_min_f16_e32 v3, 4.0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, v0, v3 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f16: ; GFX11: ; %bb.0: @@ -1130,37 +1130,37 @@ define half @local_atomic_fmin_ret_f16__offset(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, v0, v4 -; GFX940-NEXT: v_max_f16_e32 v3, v3, v3 -; GFX940-NEXT: v_min_f16_e32 v3, 4.0, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, v0, v3 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, v0, v4 +; GFX942-NEXT: v_max_f16_e32 v3, v3, v3 +; GFX942-NEXT: v_min_f16_e32 v3, 4.0, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, v0, v3 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f16__offset: ; GFX11: ; %bb.0: @@ -1450,35 +1450,35 @@ define void @local_atomic_fmin_noret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v0, v3 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_min_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v0, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v0, v3 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_min_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v0, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f16: ; GFX11: ; %bb.0: @@ -1752,36 +1752,36 @@ define void @local_atomic_fmin_noret_f16__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v1, v3 -; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX940-NEXT: v_min_f16_e32 v4, 4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v1, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v1, v3 +; GFX942-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX942-NEXT: v_min_f16_e32 v4, 4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v1, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f16__offset: ; GFX11: ; %bb.0: @@ -2054,29 +2054,29 @@ define half @local_atomic_fmin_ret_f16__offset__align4(ptr addrspace(3) %ptr) no ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_max_f16_e32 v1, v2, v2 -; GFX940-NEXT: v_min_f16_e32 v1, 4.0, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_max_f16_e32 v1, v2, v2 +; GFX942-NEXT: v_min_f16_e32 v1, 4.0, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2297,28 +2297,28 @@ define void @local_atomic_fmin_noret_f16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_max_f16_e32 v2, v1, v1 -; GFX940-NEXT: v_min_f16_e32 v2, 4.0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s2, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_max_f16_e32 v2, v1, v1 +; GFX942-NEXT: v_min_f16_e32 v2, 4.0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s2, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -2554,43 +2554,43 @@ define bfloat @local_atomic_fmin_ret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_bf16: ; GFX11: ; %bb.0: @@ -2911,44 +2911,44 @@ define bfloat @local_atomic_fmin_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v3, 4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_bf16__offset: ; GFX11: ; %bb.0: @@ -3275,42 +3275,42 @@ define void @local_atomic_fmin_noret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_bf16: ; GFX11: ; %bb.0: @@ -3620,43 +3620,43 @@ define void @local_atomic_fmin_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_min_f32_e32 v4, 4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_min_f32_e32 v4, 4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_bf16__offset: ; GFX11: ; %bb.0: @@ -3965,37 +3965,37 @@ define bfloat @local_atomic_fmin_ret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_min_f32_e32 v1, 4.0, v1 -; GFX940-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v3, v3, v1, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB18_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_min_f32_e32 v1, 4.0, v1 +; GFX942-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v3, v3, v1, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4258,36 +4258,36 @@ define void @local_atomic_fmin_noret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v2 -; GFX940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v3, v3, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: v_min_f32_e32 v2, 4.0, v2 +; GFX942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v3, v3, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -4538,29 +4538,29 @@ define <2 x half> @local_atomic_fmin_ret_v2f16(ptr addrspace(3) %ptr, <2 x half> ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_v2f16: ; GFX11: ; %bb.0: @@ -4809,29 +4809,29 @@ define <2 x half> @local_atomic_fmin_ret_v2f16__offset(ptr addrspace(3) %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB21_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_max_f16 v2, v3, v3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v2, v2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB21_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v2, v2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5079,28 +5079,28 @@ define void @local_atomic_fmin_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v3, v3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v3, v3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_v2f16: ; GFX11: ; %bb.0: @@ -5340,28 +5340,28 @@ define void @local_atomic_fmin_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX940-NEXT: .LBB23_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_max_f16 v3, v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_min_f16 v3, v3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX942-NEXT: .LBB23_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_max_f16 v3, v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_min_f16 v3, v3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5627,45 +5627,45 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB24_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB24_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_v2bf16: ; GFX11: ; %bb.0: @@ -6005,45 +6005,45 @@ define <2 x bfloat> @local_atomic_fmin_ret_v2bf16__offset(ptr addrspace(3) %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB25_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB25_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB25_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -6382,44 +6382,44 @@ define void @local_atomic_fmin_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat> ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_v2bf16: ; GFX11: ; %bb.0: @@ -6747,44 +6747,44 @@ define void @local_atomic_fmin_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_v2bf16__ofset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_min_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_v2bf16__ofset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_min_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_min_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_v2bf16__ofset: ; GFX11: ; %bb.0: @@ -7083,13 +7083,13 @@ define float @local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_rtn_f32 v0, v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_ret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -7172,13 +7172,13 @@ define void @local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 4.0 -; GFX940-NEXT: ds_min_f32 v0, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-NEXT: ds_min_f32 v0, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fmin_noret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll index 65e00c50292dc..fffdc16e1a501 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s @@ -44,26 +44,26 @@ define float @local_atomic_fsub_ret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_add_f32_e32 v1, -4.0, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_add_f32_e32 v1, -4.0, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f32: ; GFX11: ; %bb.0: @@ -255,26 +255,26 @@ define float @local_atomic_fsub_ret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB1_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_add_f32_e32 v1, -4.0, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB1_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_add_f32_e32 v1, -4.0, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB1_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f32__offset: ; GFX11: ; %bb.0: @@ -465,25 +465,25 @@ define void @local_atomic_fsub_noret_f32(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f32: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB2_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f32_e32 v2, -4.0, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB2_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f32: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB2_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f32_e32 v2, -4.0, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB2_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f32: ; GFX11: ; %bb.0: @@ -665,25 +665,25 @@ define void @local_atomic_fsub_noret_f32__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f32__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f32_e32 v2, -4.0, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB3_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f32__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f32_e32 v2, -4.0, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB3_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f32__offset: ; GFX11: ; %bb.0: @@ -873,26 +873,26 @@ define double @local_atomic_fsub_ret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: ds_read_b64 v[0:1], v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB4_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: v_add_f64 v[0:1], v[4:5], -4.0 -; GFX940-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[4:5], v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB4_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: ds_read_b64 v[0:1], v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB4_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: v_add_f64 v[0:1], v[4:5], -4.0 +; GFX942-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[4:5], v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB4_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f64: ; GFX11: ; %bb.0: @@ -1089,26 +1089,26 @@ define double @local_atomic_fsub_ret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: ds_read_b64 v[0:1], v0 offset:65528 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB5_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] -; GFX940-NEXT: v_add_f64 v[0:1], v[4:5], -4.0 -; GFX940-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[4:5], v[0:1] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB5_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: ds_read_b64 v[0:1], v0 offset:65528 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB5_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: v_add_f64 v[0:1], v[4:5], -4.0 +; GFX942-NEXT: ds_cmpst_rtn_b64 v[0:1], v2, v[4:5], v[0:1] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[4:5] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB5_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f64__offset: ; GFX11: ; %bb.0: @@ -1304,25 +1304,25 @@ define void @local_atomic_fsub_noret_f64(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f64: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b64 v[2:3], v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[2:3], -4.0 -; GFX940-NEXT: ds_cmpst_rtn_b64 v[4:5], v0, v[2:3], v[4:5] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB6_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f64: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b64 v[2:3], v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB6_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[2:3], -4.0 +; GFX942-NEXT: ds_cmpst_rtn_b64 v[4:5], v0, v[2:3], v[4:5] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB6_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f64: ; GFX11: ; %bb.0: @@ -1509,25 +1509,25 @@ define void @local_atomic_fsub_noret_f64__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f64__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b64 v[2:3], v0 offset:65528 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f64 v[4:5], v[2:3], -4.0 -; GFX940-NEXT: ds_cmpst_rtn_b64 v[4:5], v0, v[2:3], v[4:5] offset:65528 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[4:5] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB7_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f64__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b64 v[2:3], v0 offset:65528 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB7_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f64 v[4:5], v[2:3], -4.0 +; GFX942-NEXT: ds_cmpst_rtn_b64 v[4:5], v0, v[2:3], v[4:5] offset:65528 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[2:3] +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB7_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f64__offset: ; GFX11: ; %bb.0: @@ -1734,35 +1734,35 @@ define half @local_atomic_fsub_ret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v3 -; GFX940-NEXT: v_lshlrev_b32_e64 v3, v3, s0 -; GFX940-NEXT: v_not_b32_e32 v3, v3 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB8_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v0, v4 -; GFX940-NEXT: v_add_f16_e32 v2, -4.0, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v4, v3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB8_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v3 +; GFX942-NEXT: v_lshlrev_b32_e64 v3, v3, s0 +; GFX942-NEXT: v_not_b32_e32 v3, v3 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB8_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v0, v4 +; GFX942-NEXT: v_add_f16_e32 v2, -4.0, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v4, v3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB8_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f16: ; GFX11: ; %bb.0: @@ -2040,36 +2040,36 @@ define half @local_atomic_fsub_ret_f16__offset(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v2, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v3, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v3, v3 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, v0, v4 -; GFX940-NEXT: v_add_f16_e32 v2, -4.0, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, v0, v2 -; GFX940-NEXT: v_and_or_b32 v2, v4, v3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB9_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v2, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v3, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v3, v3 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB9_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, v0, v4 +; GFX942-NEXT: v_add_f16_e32 v2, -4.0, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, v0, v2 +; GFX942-NEXT: v_and_or_b32 v2, v4, v3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v1, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB9_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f16__offset: ; GFX11: ; %bb.0: @@ -2353,34 +2353,34 @@ define void @local_atomic_fsub_noret_f16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v0, v3 -; GFX940-NEXT: v_add_f16_e32 v4, -4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v0, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB10_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB10_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v0, v3 +; GFX942-NEXT: v_add_f16_e32 v4, -4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v0, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB10_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f16: ; GFX11: ; %bb.0: @@ -2647,35 +2647,35 @@ define void @local_atomic_fsub_noret_f16__offset(ptr addrspace(3) %ptr) nounwind ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB11_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, v1, v3 -; GFX940-NEXT: v_add_f16_e32 v4, -4.0, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, v1, v4 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB11_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB11_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_e32 v4, v1, v3 +; GFX942-NEXT: v_add_f16_e32 v4, -4.0, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, v1, v4 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB11_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f16__offset: ; GFX11: ; %bb.0: @@ -2941,28 +2941,28 @@ define half @local_atomic_fsub_ret_f16__offset__align4(ptr addrspace(3) %ptr) no ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB12_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_add_f16_e32 v1, -4.0, v2 -; GFX940-NEXT: v_and_or_b32 v1, v2, s2, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB12_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB12_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_add_f16_e32 v1, -4.0, v2 +; GFX942-NEXT: v_and_or_b32 v1, v2, s2, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB12_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -3176,27 +3176,27 @@ define void @local_atomic_fsub_noret_f16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_mov_b32 s2, 0xffff0000 -; GFX940-NEXT: .LBB13_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f16_e32 v2, -4.0, v1 -; GFX940-NEXT: v_and_or_b32 v2, v1, s2, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB13_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b32 s2, 0xffff0000 +; GFX942-NEXT: .LBB13_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f16_e32 v2, -4.0, v1 +; GFX942-NEXT: v_and_or_b32 v2, v1, s2, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB13_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f16__offset__align4: ; GFX11: ; %bb.0: @@ -3426,43 +3426,43 @@ define bfloat @local_atomic_fsub_ret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB14_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v3, -4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB14_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB14_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v3, -4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB14_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_bf16: ; GFX11: ; %bb.0: @@ -3781,44 +3781,44 @@ define bfloat @local_atomic_fsub_ret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: v_lshrrev_b32_e32 v0, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v0, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v0, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB15_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v3, -4.0, v3 -; GFX940-NEXT: v_bfe_u32 v5, v3, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v5, v5, v3, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v3, v4, v2, v3 -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB15_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, v0, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v0, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v0, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB15_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_lshrrev_b32_sdwa v3, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v3, -4.0, v3 +; GFX942-NEXT: v_bfe_u32 v5, v3, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v5, v5, v3, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v3, v4, v2, v3 +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v1, v4, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB15_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, v0, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_bf16__offset: ; GFX11: ; %bb.0: @@ -4143,42 +4143,42 @@ define void @local_atomic_fsub_noret_bf16(ptr addrspace(3) %ptr) nounwind { ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v1, -4, v0 -; GFX940-NEXT: ds_read_b32 v3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_and_b32_e32 v0, 24, v2 -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB16_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, -4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB16_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v1, -4, v0 +; GFX942-NEXT: ds_read_b32 v3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_and_b32_e32 v0, 24, v2 +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v2, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB16_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, -4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v1, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB16_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_bf16: ; GFX11: ; %bb.0: @@ -4486,43 +4486,43 @@ define void @local_atomic_fsub_noret_bf16__offset(ptr addrspace(3) %ptr) nounwin ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_add_u32_e32 v1, 0xfffe, v0 -; GFX940-NEXT: v_and_b32_e32 v0, -4, v1 -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: v_lshlrev_b32_e64 v2, v1, s0 -; GFX940-NEXT: v_not_b32_e32 v2, v2 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_add_f32_e32 v4, -4.0, v4 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX940-NEXT: v_and_or_b32 v4, v3, v2, v4 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB17_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_add_u32_e32 v1, 0xfffe, v0 +; GFX942-NEXT: v_and_b32_e32 v0, -4, v1 +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: v_and_b32_e32 v1, 3, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: v_lshlrev_b32_e64 v2, v1, s0 +; GFX942-NEXT: v_not_b32_e32 v2, v2 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: .LBB17_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshrrev_b32_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_add_f32_e32 v4, -4.0, v4 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v6, 0x400000, v4 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; GFX942-NEXT: v_lshlrev_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX942-NEXT: v_and_or_b32 v4, v3, v2, v4 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB17_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_bf16__offset: ; GFX11: ; %bb.0: @@ -4829,37 +4829,37 @@ define bfloat @local_atomic_fsub_ret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_add_f32_e32 v1, -4.0, v1 -; GFX940-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX940-NEXT: v_add3_u32 v3, v3, v1, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_and_or_b32 v1, v2, s3, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB18_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB18_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_add_f32_e32 v1, -4.0, v1 +; GFX942-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX942-NEXT: v_add3_u32 v3, v3, v1, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_and_or_b32 v1, v2, s3, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB18_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -5120,36 +5120,36 @@ define void @local_atomic_fsub_noret_bf16__offset__align4(ptr addrspace(3) %ptr) ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_bf16__offset__align4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 offset:65534 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0xffff0000 -; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: v_add_f32_e32 v2, -4.0, v2 -; GFX940-NEXT: v_bfe_u32 v3, v2, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v4, 0x400000, v2 -; GFX940-NEXT: v_add3_u32 v3, v3, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_and_or_b32 v2, v1, s3, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB19_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_bf16__offset__align4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 offset:65534 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0xffff0000 +; GFX942-NEXT: .LBB19_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: v_add_f32_e32 v2, -4.0, v2 +; GFX942-NEXT: v_bfe_u32 v3, v2, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v4, 0x400000, v2 +; GFX942-NEXT: v_add3_u32 v3, v3, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_and_or_b32 v2, v1, s3, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 offset:65534 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB19_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_bf16__offset__align4: ; GFX11: ; %bb.0: @@ -5396,26 +5396,26 @@ define <2 x half> @local_atomic_fsub_ret_v2f16(ptr addrspace(3) %ptr, <2 x half> ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_add_f16 v2, v3, v1 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB20_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB20_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_add_f16 v2, v3, v1 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB20_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_v2f16: ; GFX11: ; %bb.0: @@ -5652,26 +5652,26 @@ define <2 x half> @local_atomic_fsub_ret_v2f16__offset(ptr addrspace(3) %ptr, <2 ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB21_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_add_f16 v2, v3, v1 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB21_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB21_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_add_f16 v2, v3, v1 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v3, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB21_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_v2f16__offset: ; GFX11: ; %bb.0: @@ -5906,25 +5906,25 @@ define void @local_atomic_fsub_noret_v2f16(ptr addrspace(3) %ptr, <2 x half> %va ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_v2f16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB22_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v3, v2, v1 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB22_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_v2f16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB22_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v3, v2, v1 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB22_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_v2f16: ; GFX11: ; %bb.0: @@ -6150,25 +6150,25 @@ define void @local_atomic_fsub_noret_v2f16__offset(ptr addrspace(3) %ptr, <2 x h ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_v2f16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB23_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_pk_add_f16 v3, v2, v1 neg_lo:[0,1] neg_hi:[0,1] -; GFX940-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB23_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_v2f16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB23_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_pk_add_f16 v3, v2, v1 neg_lo:[0,1] neg_hi:[0,1] +; GFX942-NEXT: ds_cmpst_rtn_b32 v3, v0, v2, v3 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB23_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_v2f16__offset: ; GFX11: ; %bb.0: @@ -6423,45 +6423,45 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16(ptr addrspace(3) %ptr, <2 x bf ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB24_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB24_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB24_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_v2bf16: ; GFX11: ; %bb.0: @@ -6801,45 +6801,45 @@ define <2 x bfloat> @local_atomic_fsub_ret_v2bf16__offset(ptr addrspace(3) %ptr, ; GFX12-NEXT: v_mov_b32_e32 v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_v2bf16__offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v2, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB25_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 -; GFX940-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v2, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v2 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v2, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v2, v5, v2, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB25_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_v2bf16__offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v2, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB25_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX942-NEXT: v_sub_f32_e32 v2, v2, v3 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v2, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v2 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v2, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v2, v2 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v2, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v2, v5, v2, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v4, v2 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB25_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_v2bf16__offset: ; GFX11: ; %bb.0: @@ -7178,44 +7178,44 @@ define void @local_atomic_fsub_noret_v2bf16(ptr addrspace(3) %ptr, <2 x bfloat> ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_v2bf16: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB26_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB26_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_v2bf16: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB26_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_v2bf16: ; GFX11: ; %bb.0: @@ -7543,44 +7543,44 @@ define void @local_atomic_fsub_noret_v2bf16__ofset(ptr addrspace(3) %ptr, <2 x b ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_v2bf16__ofset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v3, v0 offset:65532 -; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: s_movk_i32 s4, 0x7fff -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: s_mov_b32 s5, 0x7060302 -; GFX940-NEXT: .LBB27_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v3 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 -; GFX940-NEXT: v_sub_f32_e32 v4, v4, v2 -; GFX940-NEXT: v_sub_f32_e32 v5, v5, v1 -; GFX940-NEXT: v_bfe_u32 v6, v4, 16, 1 -; GFX940-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v4 -; GFX940-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX940-NEXT: v_add3_u32 v6, v6, v4, s4 -; GFX940-NEXT: v_add3_u32 v8, v8, v5, s4 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 -; GFX940-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc -; GFX940-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] -; GFX940-NEXT: v_perm_b32 v4, v5, v4, s5 -; GFX940-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 -; GFX940-NEXT: s_or_b64 s[2:3], vcc, s[2:3] -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_cbranch_execnz .LBB27_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_v2bf16__ofset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v3, v0 offset:65532 +; GFX942-NEXT: s_mov_b64 s[2:3], 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: s_movk_i32 s4, 0x7fff +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: s_mov_b32 s5, 0x7060302 +; GFX942-NEXT: .LBB27_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v3 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v3 +; GFX942-NEXT: v_sub_f32_e32 v4, v4, v2 +; GFX942-NEXT: v_sub_f32_e32 v5, v5, v1 +; GFX942-NEXT: v_bfe_u32 v6, v4, 16, 1 +; GFX942-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v4 +; GFX942-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX942-NEXT: v_add3_u32 v6, v6, v4, s4 +; GFX942-NEXT: v_add3_u32 v8, v8, v5, s4 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX942-NEXT: v_cmp_u_f32_e64 s[0:1], v4, v4 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v5, v8, v9, vcc +; GFX942-NEXT: v_cndmask_b32_e64 v4, v6, v7, s[0:1] +; GFX942-NEXT: v_perm_b32 v4, v5, v4, s5 +; GFX942-NEXT: ds_cmpst_rtn_b32 v4, v0, v3, v4 offset:65532 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3 +; GFX942-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_cbranch_execnz .LBB27_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_v2bf16__ofset: ; GFX11: ; %bb.0: @@ -7895,26 +7895,26 @@ define float @local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode(ptr addrspa ; GFX12-NEXT: v_mov_b32_e32 v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB28_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_add_f32_e32 v1, -4.0, v2 -; GFX940-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB28_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_add_f32_e32 v1, -4.0, v2 +; GFX942-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB28_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_ret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: @@ -8104,25 +8104,25 @@ define void @local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode(ptr addrsp ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ds_read_b32 v1, v0 -; GFX940-NEXT: s_mov_b64 s[0:1], 0 -; GFX940-NEXT: .LBB29_1: ; %atomicrmw.start -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_add_f32_e32 v2, -4.0, v1 -; GFX940-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 -; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_cbranch_execnz .LBB29_1 -; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ds_read_b32 v1, v0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_add_f32_e32 v2, -4.0, v1 +; GFX942-NEXT: ds_cmpst_rtn_b32 v2, v0, v1, v2 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1 +; GFX942-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_cbranch_execnz .LBB29_1 +; GFX942-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX942-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: local_atomic_fsub_noret_f32__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir index 930d17646c797..6758db41506a7 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx8.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX803 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX900 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX942 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+wavefrontsize64 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s @@ -36,16 +36,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets - ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256 - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 - ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets + ; GFX942: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256 + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, [[COPY1]], implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256 @@ -100,12 +100,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc - ; GFX940: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc + ; GFX942: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 512, %stack.0, implicit-def $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]], implicit $vcc + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__literal_offsets_live_vcc ; GFX10: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 256, %stack.0, implicit-def dead $vcc, implicit $exec @@ -159,16 +159,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets - ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 - ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets + ; GFX942: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 8, [[COPY1]], implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__inline_imm_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 @@ -229,15 +229,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets - ; GFX940: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 - ; GFX940-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 - ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets + ; GFX942: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 + ; GFX942-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0 + ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 killed [[S_MOV_B32_]], [[S_MOV_B32_1]], implicit-def dead $scc + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[COPY]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 8, [[S_ADD_I32_]], 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 @@ -291,12 +291,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc - ; GFX940: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] - ; GFX940-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc + ; GFX942: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_1]] ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__inline_imm_offsets_live_vcc ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %stack.0, 8, 0, implicit $exec @@ -344,12 +344,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets - ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]] - ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets + ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def dead $scc @@ -397,12 +397,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets - ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] - ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets + ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 16, %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__inline_imm_offsets ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 8, %stack.0, implicit-def dead $scc @@ -459,16 +459,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] - ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], %stack.0, implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets ; GFX10: liveins: $sgpr4, $sgpr5 @@ -536,16 +536,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute - ; GFX940: liveins: $sgpr4, $sgpr5 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX940-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] - ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute + ; GFX942: liveins: $sgpr4, $sgpr5 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX942-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY1]], implicit-def dead $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__reg_offsets_commute ; GFX10: liveins: $sgpr4, $sgpr5 @@ -606,13 +606,13 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] ; GFX900-NEXT: SI_RETURN implicit $scc ; - ; GFX940-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc - ; GFX940: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]] - ; GFX940-NEXT: S_NOP 0, implicit $scc - ; GFX940-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] - ; GFX940-NEXT: SI_RETURN implicit $scc + ; GFX942-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc + ; GFX942: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_]] + ; GFX942-NEXT: S_NOP 0, implicit $scc + ; GFX942-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 512, %stack.0, implicit-def $scc + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2359305 /* reguse:SReg_32 */, [[S_ADD_I32_1]] + ; GFX942-NEXT: SI_RETURN implicit $scc ; ; GFX10-LABEL: name: local_stack_alloc__s_add_i32__literal_offsets_live_scc ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc @@ -672,16 +672,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets - ; GFX940: liveins: $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 - ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets + ; GFX942: liveins: $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 + ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets ; GFX10: liveins: $vgpr0 @@ -747,16 +747,16 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute - ; GFX940: liveins: $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 - ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute + ; GFX942: liveins: $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 + ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], %vgpr_offset, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__vgpr_offsets_commute ; GFX10: liveins: $vgpr0 @@ -820,15 +820,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %sgpr_offset, %stack.0, implicit-def dead $vcc, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e32__sgpr_offsets ; GFX10: liveins: $sgpr8 @@ -896,18 +896,18 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY]], 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY1]], 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY]], 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %sgpr_offset, [[COPY1]], 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets ; GFX10: liveins: $sgpr8 @@ -973,18 +973,18 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] - ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset - ; GFX940-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY1]], 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.0 + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_]] + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY %sgpr_offset + ; GFX942-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[COPY1]], 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_CO_U32_e64_2]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_co_u32_e64__sgpr_offsets_commute ; GFX10: liveins: $sgpr8 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir index 7ed1531335177..e4c2d54d9894d 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-add-references.gfx9.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX900 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX942 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=localstackalloc -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s @@ -25,12 +25,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets - ; GFX940: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets + ; GFX942: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 256, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 512, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__literal_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 256 @@ -77,12 +77,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets - ; GFX940: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets + ; GFX942: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 8, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 16, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__inline_imm_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 @@ -129,12 +129,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets - ; GFX940: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] - ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets + ; GFX942: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 @@ -183,15 +183,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets - ; GFX940: liveins: $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets + ; GFX942: liveins: $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %vgpr_offset, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets ; GFX10: liveins: $vgpr0 @@ -245,15 +245,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute - ; GFX940: liveins: $vgpr0 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 - ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute + ; GFX942: liveins: $vgpr0 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %vgpr_offset:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, %vgpr_offset, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__vgpr_offsets_commute ; GFX10: liveins: $vgpr0 @@ -306,15 +306,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] - ; GFX940-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_]] + ; GFX942-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %sgpr_offset, %stack.0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e32_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e32__sgpr_offsets ; GFX10: liveins: $sgpr8 @@ -368,15 +368,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] - ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %sgpr_offset, %stack.0, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets ; GFX10: liveins: $sgpr8 @@ -430,15 +430,15 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute - ; GFX940: liveins: $sgpr8 - ; GFX940-NEXT: {{ $}} - ; GFX940-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 - ; GFX940-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] - ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute + ; GFX942: liveins: $sgpr8 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: %sgpr_offset:sreg_32 = COPY $sgpr8 + ; GFX942-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, %sgpr_offset, 0, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__sgpr_offsets_commute ; GFX10: liveins: $sgpr8 @@ -491,12 +491,12 @@ body: | ; GFX900-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] ; GFX900-NEXT: SI_RETURN ; - ; GFX940-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier - ; GFX940: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] - ; GFX940-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec - ; GFX940-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] - ; GFX940-NEXT: SI_RETURN + ; GFX942-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier + ; GFX942: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 8, 1, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_]] + ; GFX942-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 16, %stack.0, 1, implicit $exec + ; GFX942-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:VGPR_32 */, [[V_ADD_U32_e64_1]] + ; GFX942-NEXT: SI_RETURN ; ; GFX10-LABEL: name: local_stack_alloc__v_add_u32_e64__inline_imm_offsets_clamp_modifier ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 8 diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll index a62910e4e5711..b4bbe849c08b9 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-nontemporal-metadata.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX9-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940,GFX940-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX940,GFX940-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=1 -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel=0 -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s @@ -71,65 +71,65 @@ define amdgpu_kernel void @buffer_nontemporal_load_store(ptr addrspace(7) %in, p ; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen glc slc ; GFX9-GISEL-NEXT: s_endpgm ; -; GFX940-SDAG-LABEL: buffer_nontemporal_load_store: -; GFX940-SDAG: ; %bb.0: ; %entry -; GFX940-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-SDAG-NEXT: s_load_dword s13, s[4:5], 0x10 -; GFX940-SDAG-NEXT: s_mov_b32 s12, 0 -; GFX940-SDAG-NEXT: s_mov_b32 s7, s12 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_mov_b32 s6, s3 -; GFX940-SDAG-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13] -; GFX940-SDAG-NEXT: s_mov_b32 s13, s2 -; GFX940-SDAG-NEXT: s_mov_b32 s2, s1 -; GFX940-SDAG-NEXT: s_mov_b32 s3, s12 -; GFX940-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13] -; GFX940-SDAG-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen nt -; GFX940-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30 -; GFX940-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 -; GFX940-SDAG-NEXT: s_mov_b32 s5, s12 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_mov_b32 s4, s3 -; GFX940-SDAG-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13] -; GFX940-SDAG-NEXT: s_mov_b32 s13, s2 -; GFX940-SDAG-NEXT: s_mov_b32 s2, s1 -; GFX940-SDAG-NEXT: s_mov_b32 s3, s12 -; GFX940-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 nt sc1 -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: buffer_nontemporal_load_store: +; GFX942-SDAG: ; %bb.0: ; %entry +; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x10 +; GFX942-SDAG-NEXT: s_mov_b32 s12, 0 +; GFX942-SDAG-NEXT: s_mov_b32 s7, s12 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_mov_b32 s6, s3 +; GFX942-SDAG-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13] +; GFX942-SDAG-NEXT: s_mov_b32 s13, s2 +; GFX942-SDAG-NEXT: s_mov_b32 s2, s1 +; GFX942-SDAG-NEXT: s_mov_b32 s3, s12 +; GFX942-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen nt +; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30 +; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX942-SDAG-NEXT: s_mov_b32 s5, s12 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_mov_b32 s4, s3 +; GFX942-SDAG-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13] +; GFX942-SDAG-NEXT: s_mov_b32 s13, s2 +; GFX942-SDAG-NEXT: s_mov_b32 s2, s1 +; GFX942-SDAG-NEXT: s_mov_b32 s3, s12 +; GFX942-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen nt +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: buffer_nontemporal_load_store: -; GFX940-GISEL: ; %bb.0: ; %entry -; GFX940-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-GISEL-NEXT: s_load_dword s11, s[4:5], 0x10 -; GFX940-GISEL-NEXT: s_mov_b32 s7, 0 -; GFX940-GISEL-NEXT: s_mov_b32 s8, s7 -; GFX940-GISEL-NEXT: s_mov_b32 s10, s7 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_mov_b32 s6, s1 -; GFX940-GISEL-NEXT: s_mov_b32 s9, s2 -; GFX940-GISEL-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GFX940-GISEL-NEXT: s_mov_b32 s6, s3 -; GFX940-GISEL-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GFX940-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-GISEL-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen nt -; GFX940-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 -; GFX940-GISEL-NEXT: s_load_dword s9, s[4:5], 0x30 -; GFX940-GISEL-NEXT: s_mov_b32 s4, s7 -; GFX940-GISEL-NEXT: s_mov_b32 s8, s7 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_mov_b32 s6, s1 -; GFX940-GISEL-NEXT: s_mov_b32 s5, s2 -; GFX940-GISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] -; GFX940-GISEL-NEXT: s_mov_b32 s6, s3 -; GFX940-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 nt sc1 -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: buffer_nontemporal_load_store: +; GFX942-GISEL: ; %bb.0: ; %entry +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-GISEL-NEXT: s_load_dword s11, s[4:5], 0x10 +; GFX942-GISEL-NEXT: s_mov_b32 s7, 0 +; GFX942-GISEL-NEXT: s_mov_b32 s8, s7 +; GFX942-GISEL-NEXT: s_mov_b32 s10, s7 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_mov_b32 s6, s1 +; GFX942-GISEL-NEXT: s_mov_b32 s9, s2 +; GFX942-GISEL-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] +; GFX942-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX942-GISEL-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen nt +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX942-GISEL-NEXT: s_load_dword s9, s[4:5], 0x30 +; GFX942-GISEL-NEXT: s_mov_b32 s4, s7 +; GFX942-GISEL-NEXT: s_mov_b32 s8, s7 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_mov_b32 s6, s1 +; GFX942-GISEL-NEXT: s_mov_b32 s5, s2 +; GFX942-GISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] +; GFX942-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX942-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen nt +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX10-SDAG-LABEL: buffer_nontemporal_load_store: ; GFX10-SDAG: ; %bb.0: ; %entry @@ -399,65 +399,65 @@ define amdgpu_kernel void @buffer_nontemporal_and_volatile_load_store(ptr addrsp ; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen ; GFX9-GISEL-NEXT: s_endpgm ; -; GFX940-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: -; GFX940-SDAG: ; %bb.0: ; %entry -; GFX940-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-SDAG-NEXT: s_load_dword s13, s[4:5], 0x10 -; GFX940-SDAG-NEXT: s_mov_b32 s12, 0 -; GFX940-SDAG-NEXT: s_mov_b32 s7, s12 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_mov_b32 s6, s3 -; GFX940-SDAG-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13] -; GFX940-SDAG-NEXT: s_mov_b32 s13, s2 -; GFX940-SDAG-NEXT: s_mov_b32 s2, s1 -; GFX940-SDAG-NEXT: s_mov_b32 s3, s12 -; GFX940-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13] -; GFX940-SDAG-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 -; GFX940-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30 -; GFX940-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 -; GFX940-SDAG-NEXT: s_mov_b32 s5, s12 -; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-SDAG-NEXT: s_mov_b32 s4, s3 -; GFX940-SDAG-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13] -; GFX940-SDAG-NEXT: s_mov_b32 s13, s2 -; GFX940-SDAG-NEXT: s_mov_b32 s2, s1 -; GFX940-SDAG-NEXT: s_mov_b32 s3, s12 -; GFX940-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] -; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX940-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 -; GFX940-SDAG-NEXT: s_endpgm +; GFX942-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: +; GFX942-SDAG: ; %bb.0: ; %entry +; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x10 +; GFX942-SDAG-NEXT: s_mov_b32 s12, 0 +; GFX942-SDAG-NEXT: s_mov_b32 s7, s12 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_mov_b32 s6, s3 +; GFX942-SDAG-NEXT: s_or_b64 s[10:11], s[6:7], s[12:13] +; GFX942-SDAG-NEXT: s_mov_b32 s13, s2 +; GFX942-SDAG-NEXT: s_mov_b32 s2, s1 +; GFX942-SDAG-NEXT: s_mov_b32 s3, s12 +; GFX942-SDAG-NEXT: s_or_b64 s[8:9], s[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-SDAG-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 +; GFX942-SDAG-NEXT: s_load_dword s13, s[4:5], 0x30 +; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX942-SDAG-NEXT: s_mov_b32 s5, s12 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_mov_b32 s4, s3 +; GFX942-SDAG-NEXT: s_or_b64 s[6:7], s[4:5], s[12:13] +; GFX942-SDAG-NEXT: s_mov_b32 s13, s2 +; GFX942-SDAG-NEXT: s_mov_b32 s2, s1 +; GFX942-SDAG-NEXT: s_mov_b32 s3, s12 +; GFX942-SDAG-NEXT: s_or_b64 s[4:5], s[2:3], s[12:13] +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 +; GFX942-SDAG-NEXT: s_endpgm ; -; GFX940-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: -; GFX940-GISEL: ; %bb.0: ; %entry -; GFX940-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-GISEL-NEXT: s_load_dword s11, s[4:5], 0x10 -; GFX940-GISEL-NEXT: s_mov_b32 s7, 0 -; GFX940-GISEL-NEXT: s_mov_b32 s8, s7 -; GFX940-GISEL-NEXT: s_mov_b32 s10, s7 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_mov_b32 s6, s1 -; GFX940-GISEL-NEXT: s_mov_b32 s9, s2 -; GFX940-GISEL-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] -; GFX940-GISEL-NEXT: s_mov_b32 s6, s3 -; GFX940-GISEL-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GFX940-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-GISEL-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 -; GFX940-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 -; GFX940-GISEL-NEXT: s_load_dword s9, s[4:5], 0x30 -; GFX940-GISEL-NEXT: s_mov_b32 s4, s7 -; GFX940-GISEL-NEXT: s_mov_b32 s8, s7 -; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-GISEL-NEXT: s_mov_b32 s6, s1 -; GFX940-GISEL-NEXT: s_mov_b32 s5, s2 -; GFX940-GISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] -; GFX940-GISEL-NEXT: s_mov_b32 s6, s3 -; GFX940-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX940-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 -; GFX940-GISEL-NEXT: s_endpgm +; GFX942-GISEL-LABEL: buffer_nontemporal_and_volatile_load_store: +; GFX942-GISEL: ; %bb.0: ; %entry +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-GISEL-NEXT: s_load_dword s11, s[4:5], 0x10 +; GFX942-GISEL-NEXT: s_mov_b32 s7, 0 +; GFX942-GISEL-NEXT: s_mov_b32 s8, s7 +; GFX942-GISEL-NEXT: s_mov_b32 s10, s7 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_mov_b32 s6, s1 +; GFX942-GISEL-NEXT: s_mov_b32 s9, s2 +; GFX942-GISEL-NEXT: s_or_b64 s[8:9], s[6:7], s[8:9] +; GFX942-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX942-GISEL-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-GISEL-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen sc0 sc1 +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 +; GFX942-GISEL-NEXT: s_load_dword s9, s[4:5], 0x30 +; GFX942-GISEL-NEXT: s_mov_b32 s4, s7 +; GFX942-GISEL-NEXT: s_mov_b32 s8, s7 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_mov_b32 s6, s1 +; GFX942-GISEL-NEXT: s_mov_b32 s5, s2 +; GFX942-GISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] +; GFX942-GISEL-NEXT: s_mov_b32 s6, s3 +; GFX942-GISEL-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] +; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX942-GISEL-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen sc0 sc1 +; GFX942-GISEL-NEXT: s_endpgm ; ; GFX10-SDAG-LABEL: buffer_nontemporal_and_volatile_load_store: ; GFX10-SDAG: ; %bb.0: ; %entry @@ -672,4 +672,4 @@ entry: ; GFX11: {{.*}} ; GFX12: {{.*}} ; GFX9: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll index e9a1b38eee157..4262cc44a6e74 100644 --- a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s define i64 @lshl_add_u64_v1v(i64 %v, i64 %a) { ; GCN-LABEL: lshl_add_u64_v1v: diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll index 228a85058152b..73ca6f2b075c9 100644 --- a/llvm/test/CodeGen/AMDGPU/madak.ll +++ b/llvm/test/CodeGen/AMDGPU/madak.ll @@ -5,7 +5,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-MAD %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-MAD %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX940-FMA %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX942-FMA %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX10-FMA %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX11-FMA %s @@ -101,19 +101,19 @@ define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspac ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: madak_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: global_load_dword v2, v0, s[6:7] -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fmaak_f32 v1, v1, v2, 0x41200000 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: madak_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: global_load_dword v2, v0, s[6:7] +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fmaak_f32 v1, v1, v2, 0x41200000 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: madak_f32: ; GFX10-FMA: ; %bb.0: @@ -281,26 +281,26 @@ define amdgpu_kernel void @madak_2_use_f32(ptr addrspace(1) noalias %out, ptr ad ; GFX11-MAD-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: madak_2_use_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: v_mov_b32_e32 v4, 0x41200000 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v2, v0, s[2:3] offset:4 sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v3, v0, s[2:3] offset:8 sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fmaak_f32 v2, v1, v2, 0x41200000 -; GFX940-FMA-NEXT: v_fmac_f32_e32 v4, v1, v3 -; GFX940-FMA-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: global_store_dword v0, v4, s[2:3] offset:4 sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: madak_2_use_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: v_mov_b32_e32 v4, 0x41200000 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v2, v0, s[2:3] offset:4 sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v3, v0, s[2:3] offset:8 sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fmaak_f32 v2, v1, v2, 0x41200000 +; GFX942-FMA-NEXT: v_fmac_f32_e32 v4, v1, v3 +; GFX942-FMA-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: global_store_dword v0, v4, s[2:3] offset:4 sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: madak_2_use_f32: ; GFX10-FMA: ; %bb.0: @@ -434,17 +434,17 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(ptr addrspace(1) noalias %out, ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: madak_m_inline_imm_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fmaak_f32 v1, 4.0, v1, 0x41200000 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: madak_m_inline_imm_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fmaak_f32 v1, 4.0, v1, 0x41200000 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: madak_m_inline_imm_f32: ; GFX10-FMA: ; %bb.0: @@ -572,19 +572,19 @@ define amdgpu_kernel void @madak_inline_imm_f32(ptr addrspace(1) noalias %out, p ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: madak_inline_imm_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: global_load_dword v2, v0, s[6:7] -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fma_f32 v1, v1, v2, 4.0 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: madak_inline_imm_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: global_load_dword v2, v0, s[6:7] +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fma_f32 v1, v1, v2, 4.0 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: madak_inline_imm_f32: ; GFX10-FMA: ; %bb.0: @@ -711,19 +711,19 @@ define amdgpu_kernel void @s_v_madak_f32(ptr addrspace(1) noalias %out, ptr addr ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: s_v_madak_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_load_dword s6, s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: v_mov_b32_e32 v2, 0x41200000 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fmac_f32_e32 v2, s6, v1 -; GFX940-FMA-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: s_v_madak_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_load_dword s6, s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: v_mov_b32_e32 v2, 0x41200000 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fmac_f32_e32 v2, s6, v1 +; GFX942-FMA-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: s_v_madak_f32: ; GFX10-FMA: ; %bb.0: @@ -848,20 +848,20 @@ define amdgpu_kernel void @v_s_madak_f32(ptr addrspace(1) noalias %out, float %a ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: v_s_madak_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: v_mov_b32_e32 v2, 0x41200000 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[0:1] -; GFX940-FMA-NEXT: s_load_dword s2, s[4:5], 0x2c -; GFX940-FMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-FMA-NEXT: v_fmac_f32_e32 v2, s2, v1 -; GFX940-FMA-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: v_s_madak_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: v_mov_b32_e32 v2, 0x41200000 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[0:1] +; GFX942-FMA-NEXT: s_load_dword s2, s[4:5], 0x2c +; GFX942-FMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-FMA-NEXT: v_fmac_f32_e32 v2, s2, v1 +; GFX942-FMA-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: v_s_madak_f32: ; GFX10-FMA: ; %bb.0: @@ -963,16 +963,16 @@ define amdgpu_kernel void @s_s_madak_f32(ptr addrspace(1) %out, float %a, float ; GFX11-MAD-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: s_s_madak_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: v_mov_b32_e32 v1, 0x41200000 -; GFX940-FMA-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-FMA-NEXT: v_fmac_f32_e32 v1, s2, v2 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: s_s_madak_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: v_mov_b32_e32 v1, 0x41200000 +; GFX942-FMA-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-FMA-NEXT: v_fmac_f32_e32 v1, s2, v2 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: s_s_madak_f32: ; GFX10-FMA: ; %bb.0: @@ -1091,20 +1091,20 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(ptr addrspace(1) noalias % ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: no_madak_src0_modifier_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: global_load_dword v2, v0, s[6:7] -; GFX940-FMA-NEXT: s_mov_b32 s2, 0x41200000 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fma_f32 v1, |v1|, v2, s2 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: no_madak_src0_modifier_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: global_load_dword v2, v0, s[6:7] +; GFX942-FMA-NEXT: s_mov_b32 s2, 0x41200000 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fma_f32 v1, |v1|, v2, s2 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: no_madak_src0_modifier_f32: ; GFX10-FMA: ; %bb.0: @@ -1245,20 +1245,20 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(ptr addrspace(1) noalias % ; GFX11-MAD-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: no_madak_src1_modifier_f32: -; GFX940-FMA: ; %bb.0: -; GFX940-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX940-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX940-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-FMA-NEXT: global_load_dword v2, v0, s[6:7] -; GFX940-FMA-NEXT: s_mov_b32 s2, 0x41200000 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: v_fma_f32 v1, v1, |v2|, s2 -; GFX940-FMA-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: no_madak_src1_modifier_f32: +; GFX942-FMA: ; %bb.0: +; GFX942-FMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX942-FMA-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX942-FMA-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-FMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-FMA-NEXT: global_load_dword v2, v0, s[6:7] +; GFX942-FMA-NEXT: s_mov_b32 s2, 0x41200000 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: v_fma_f32 v1, v1, |v2|, s2 +; GFX942-FMA-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: no_madak_src1_modifier_f32: ; GFX10-FMA: ; %bb.0: @@ -1427,27 +1427,27 @@ define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, [8 x i32], fl ; GFX11-MAD-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-MAD-NEXT: s_endpgm ; -; GFX940-FMA-LABEL: madak_constant_bus_violation: -; GFX940-FMA: ; %bb.0: ; %bb -; GFX940-FMA-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: s_cmp_lg_u32 s0, 0 -; GFX940-FMA-NEXT: s_cbranch_scc1 .LBB9_2 -; GFX940-FMA-NEXT: ; %bb.1: ; %bb3 -; GFX940-FMA-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-FMA-NEXT: global_store_dword v[0:1], v0, off sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: .LBB9_2: ; %bb4 -; GFX940-FMA-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: s_load_dword s0, s[4:5], 0x48 -; GFX940-FMA-NEXT: v_mov_b32_e32 v1, 0x42280000 -; GFX940-FMA-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-FMA-NEXT: v_fmac_f32_e64 v1, s0, 0.5 -; GFX940-FMA-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX940-FMA-NEXT: global_store_dword v[0:1], v0, off sc0 sc1 -; GFX940-FMA-NEXT: s_waitcnt vmcnt(0) -; GFX940-FMA-NEXT: s_endpgm +; GFX942-FMA-LABEL: madak_constant_bus_violation: +; GFX942-FMA: ; %bb.0: ; %bb +; GFX942-FMA-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: s_cmp_lg_u32 s0, 0 +; GFX942-FMA-NEXT: s_cbranch_scc1 .LBB9_2 +; GFX942-FMA-NEXT: ; %bb.1: ; %bb3 +; GFX942-FMA-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-FMA-NEXT: global_store_dword v[0:1], v0, off sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: .LBB9_2: ; %bb4 +; GFX942-FMA-NEXT: global_load_dword v0, v[0:1], off sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: s_load_dword s0, s[4:5], 0x48 +; GFX942-FMA-NEXT: v_mov_b32_e32 v1, 0x42280000 +; GFX942-FMA-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-FMA-NEXT: v_fmac_f32_e64 v1, s0, 0.5 +; GFX942-FMA-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX942-FMA-NEXT: global_store_dword v[0:1], v0, off sc0 sc1 +; GFX942-FMA-NEXT: s_waitcnt vmcnt(0) +; GFX942-FMA-NEXT: s_endpgm ; ; GFX10-FMA-LABEL: madak_constant_bus_violation: ; GFX10-FMA: ; %bb.0: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir similarity index 98% rename from llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir rename to llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir index 0af37ad8c896e..d029043f90a85 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX942 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s # GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read @@ -145,7 +145,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: sgemm4x4_mfma_write_agpr_mfma_read_overlap @@ -166,7 +166,7 @@ body: | ... # GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap @@ -177,8 +177,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 1 @@ -191,8 +191,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 1 @@ -224,8 +224,8 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 1 @@ -240,7 +240,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap @@ -253,7 +253,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap @@ -286,7 +286,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap @@ -297,8 +297,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -322,8 +322,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -358,7 +358,7 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap @@ -371,7 +371,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap @@ -383,7 +383,7 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial @@ -395,7 +395,7 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial @@ -417,7 +417,7 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap @@ -440,7 +440,7 @@ body: | # GCN-LABEL: name: smfmac32x32_write_agpr_mfma_srca_read_overlap # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: smfmac32x32_write_agpr_mfma_srca_read_overlap @@ -452,7 +452,7 @@ body: | # GCN-LABEL: name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap @@ -465,7 +465,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap @@ -538,8 +538,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -563,8 +563,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -638,8 +638,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -653,8 +653,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -668,8 +668,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -734,7 +734,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: FLAT_STORE_DWORD name: smfmac16x16_write_vgpr_flat_read @@ -746,7 +746,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: xdl_smfma16x16_write_vgpr_flat_read @@ -758,7 +758,7 @@ body: | # GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: smfmac32x32_write_vgpr_flat_read @@ -771,7 +771,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: FLAT_STORE_DWORD name: xdl_smfma32x32_write_vgpr_flat_read @@ -827,7 +827,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma16x16_write_vgpr_valu_read @@ -840,7 +840,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma32x32_write_vgpr_valu_read @@ -861,8 +861,8 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -887,7 +887,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 name: xdl_smfma16x16_write_vgpr_accv_read @@ -900,7 +900,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 name: xdl_smfma32x32_write_vgpr_accv_read @@ -931,8 +931,8 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -958,7 +958,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma16x16_write_vgpr_valu_write @@ -971,7 +971,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: xdl_smfma32x32_write_vgpr_valu_write @@ -993,7 +993,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_FMA_F16_e64 name: xdl_smfma16x16_write_vgpr_valu_f16_write @@ -1006,7 +1006,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_FMA_F16_e64 name: xdl_smfma32x32_write_vgpr_valu_f16_write @@ -1028,7 +1028,7 @@ body: | # GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32_sdwa name: xdl_smfma16x16_write_vgpr_valu_sdwa_write @@ -1041,7 +1041,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32_sdwa name: xdl_smfma32x32_write_vgpr_valu_sdwa_write @@ -1378,8 +1378,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1403,8 +1403,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1430,7 +1430,7 @@ body: | # GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap @@ -1443,7 +1443,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap @@ -1484,8 +1484,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1509,8 +1509,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1545,8 +1545,8 @@ body: | ... # GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1604,8 +1604,8 @@ body: | ... # GCN-LABEL: name: dmfma16x16_write_agpr_valu_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 7 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 7 # GFX950-NEXT: S_NOP 7 @@ -1678,7 +1678,7 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap @@ -1689,7 +1689,7 @@ body: | ... # GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap @@ -1700,7 +1700,7 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap @@ -1711,7 +1711,7 @@ body: | ... # GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap @@ -1722,7 +1722,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16_write_agpr_smfmac_read_overlap @@ -1733,7 +1733,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap @@ -1744,7 +1744,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap @@ -1755,7 +1755,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap @@ -1766,7 +1766,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write @@ -1777,7 +1777,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: BUFFER_STORE_DWORD name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read @@ -1788,7 +1788,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read @@ -1799,7 +1799,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_DOT name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read @@ -1828,7 +1828,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap @@ -1840,7 +1840,7 @@ body: | # GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap @@ -1851,7 +1851,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_SMFMAC name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx @@ -2027,7 +2027,7 @@ body: | ... # GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap @@ -2038,7 +2038,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: smfmac16x16_mfma_write_agpr_mfma_read_overlap @@ -2078,7 +2078,7 @@ body: | ... # GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write # GCN: V_SMFMAC -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MOV_B32 name: smfmac16x16_read_vgpr_srcc_valu_write @@ -2109,7 +2109,7 @@ body: | # GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write # GCN: V_SMFMAC # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MOV_B32 name: smfmac32x32_read_vgpr_srcc_valu_write @@ -2155,7 +2155,7 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc @@ -2201,7 +2201,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc @@ -2216,7 +2216,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca @@ -2231,7 +2231,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb @@ -2291,7 +2291,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc @@ -2306,7 +2306,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca @@ -2321,7 +2321,7 @@ body: | # 4 pass source # GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb # GCN: V_MFMA -# GFX940-NEXT: S_NOP 6 +# GFX942-NEXT: S_NOP 6 # GFX950-NEXT: S_NOP 7 # GCN-NEXT: V_MFMA name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb @@ -2337,7 +2337,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc @@ -2353,7 +2353,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca @@ -2369,7 +2369,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb @@ -2386,7 +2386,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc @@ -2404,7 +2404,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca @@ -2421,7 +2421,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb @@ -2477,7 +2477,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc @@ -2492,7 +2492,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca @@ -2507,7 +2507,7 @@ body: | # GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb @@ -2523,7 +2523,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc @@ -2540,7 +2540,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca @@ -2558,7 +2558,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_MFMA name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb @@ -2573,7 +2573,7 @@ body: | # 2 pass source # GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 2 +# GFX942-NEXT: S_NOP 2 # GFX950-NEXT: S_NOP 3 # GCN-NEXT: V_SMFMAC_ name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc @@ -2588,7 +2588,7 @@ body: | ... # GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA -# GFX940-NEXT: S_NOP 4 +# GFX942-NEXT: S_NOP 4 # GFX950-NEXT: S_NOP 5 # GCN-NEXT: V_SMFMAC_ name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc @@ -2603,7 +2603,7 @@ body: | # GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc # GCN: V_MFMA # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc @@ -2618,7 +2618,7 @@ body: | # GCN: V_MFMA # GCN-NEXT: S_NOP 7 # GCN-NEXT: S_NOP 7 -# GFX940-NEXT: S_NOP 0 +# GFX942-NEXT: S_NOP 0 # GFX950-NEXT: S_NOP 1 # GCN-NEXT: V_SMFMAC_ name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index 5e74a380e0748..b4682dfb8a26d 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -5,7 +5,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9. define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { @@ -189,34 +189,34 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: s_addc_u32 s0, s32, 0x4040 -; GFX940-NEXT: s_bitcmp1_b32 s0, 0 -; GFX940-NEXT: s_bitset0_b32 s0, 0 -; GFX940-NEXT: v_writelane_b32 v1, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v1, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 +; GFX942-NEXT: s_bitcmp1_b32 s0, 0 +; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v1, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) @@ -390,31 +390,31 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_dead_scc: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4040 -; GFX940-NEXT: v_writelane_b32 v1, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v1, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX942-NEXT: v_writelane_b32 v1, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) @@ -632,39 +632,39 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, s33 -; GFX940-NEXT: s_mov_b32 s33, s32 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 -; GFX940-NEXT: scratch_store_dword off, v1, s3 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_addk_i32 s32, 0x4080 -; GFX940-NEXT: s_add_i32 s0, s33, 64 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: s_addc_u32 s0, s33, 0x4040 -; GFX940-NEXT: s_bitcmp1_b32 s0, 0 -; GFX940-NEXT: s_bitset0_b32 s0, 0 -; GFX940-NEXT: v_writelane_b32 v1, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v1, 0 -; GFX940-NEXT: s_mov_b32 s32, s33 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s3, s33, 0x4044 -; GFX940-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_mov_b32 s33, s2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, s33 +; GFX942-NEXT: s_mov_b32 s33, s32 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: s_add_i32 s0, s33, 64 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 +; GFX942-NEXT: s_bitcmp1_b32 s0, 0 +; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v1, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v1, 0 +; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_mov_b32 s33, s2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) @@ -820,29 +820,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: s_addc_u32 s0, s32, 64 -; GFX940-NEXT: s_bitcmp1_b32 s0, 0 -; GFX940-NEXT: s_bitset0_b32 s0, 0 -; GFX940-NEXT: v_writelane_b32 v0, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: s_addc_u32 s0, s32, 64 +; GFX942-NEXT: s_bitcmp1_b32 s0, 0 +; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v0, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void @@ -982,26 +982,26 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX940-NEXT: scratch_store_dword off, v0, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_writelane_b32 v0, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX940-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_writelane_b32 v0, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) ret void @@ -1184,34 +1184,34 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, s33 -; GFX940-NEXT: s_mov_b32 s33, s32 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 -; GFX940-NEXT: scratch_store_dword off, v0, s3 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_addk_i32 s32, 0x4080 -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: s_addc_u32 s0, s33, 64 -; GFX940-NEXT: s_bitcmp1_b32 s0, 0 -; GFX940-NEXT: s_bitset0_b32 s0, 0 -; GFX940-NEXT: v_writelane_b32 v0, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v0, 0 -; GFX940-NEXT: s_mov_b32 s32, s33 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s3, s33, 0x4040 -; GFX940-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_mov_b32 s33, s2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, s33 +; GFX942-NEXT: s_mov_b32 s33, s32 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: s_addc_u32 s0, s33, 64 +; GFX942-NEXT: s_bitcmp1_b32 s0, 0 +; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v0, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v0, 0 +; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_mov_b32 s33, s2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void @@ -1380,31 +1380,31 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s0, s33 -; GFX940-NEXT: s_mov_b32 s33, s32 -; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 -; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_addk_i32 s32, 0x4080 -; GFX940-NEXT: s_add_i32 s1, s33, 64 -; GFX940-NEXT: v_writelane_b32 v0, s59, 0 -; GFX940-NEXT: s_mov_b32 s59, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v0, 0 -; GFX940-NEXT: s_mov_b32 s32, s33 -; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX940-NEXT: s_add_i32 s1, s33, 0x4040 -; GFX940-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_mov_b32 s33, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s0, s33 +; GFX942-NEXT: s_mov_b32 s33, s32 +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: s_add_i32 s1, s33, 64 +; GFX942-NEXT: v_writelane_b32 v0, s59, 0 +; GFX942-NEXT: s_mov_b32 s59, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v0, 0 +; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_mov_b32 s33, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) ret void @@ -1576,31 +1576,31 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX940-NEXT: scratch_store_dword off, v1, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v1, s59, 0 -; GFX940-NEXT: s_add_i32 s59, s32, 0x442c -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v1, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s59, 0 +; GFX942-NEXT: s_add_i32 s59, s32, 0x442c +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 @@ -1789,33 +1789,33 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX940-NEXT: s_add_i32 s1, s32, 0x8040 -; GFX940-NEXT: scratch_store_dword off, v1, s1 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[2:3] -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: v_writelane_b32 v1, s59, 0 -; GFX940-NEXT: s_add_i32 s59, s32, s0 -; GFX940-NEXT: s_addk_i32 s59, 0x4040 -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_and_b64 s[0:1], 0, exec -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s59, v1, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX940-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 +; GFX942-NEXT: v_writelane_b32 v1, s59, 0 +; GFX942-NEXT: s_add_i32 s59, s32, s0 +; GFX942-NEXT: s_addk_i32 s59, 0x4040 +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_and_b64 s[0:1], 0, exec +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s59, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index a2f7d24f9ec87..2420393b63ba9 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s @@ -302,97 +302,97 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_store_dword off, v23, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v23, s30, 0 -; GFX940-NEXT: v_writelane_b32 v23, s31, 1 -; GFX940-NEXT: v_writelane_b32 v23, s33, 2 -; GFX940-NEXT: v_writelane_b32 v23, s34, 3 -; GFX940-NEXT: v_writelane_b32 v23, s35, 4 -; GFX940-NEXT: v_writelane_b32 v23, s36, 5 -; GFX940-NEXT: v_writelane_b32 v23, s37, 6 -; GFX940-NEXT: v_writelane_b32 v23, s38, 7 -; GFX940-NEXT: v_writelane_b32 v23, s39, 8 -; GFX940-NEXT: v_writelane_b32 v23, s40, 9 -; GFX940-NEXT: v_writelane_b32 v23, s41, 10 -; GFX940-NEXT: v_writelane_b32 v23, s42, 11 -; GFX940-NEXT: v_writelane_b32 v23, s43, 12 -; GFX940-NEXT: v_writelane_b32 v23, s44, 13 -; GFX940-NEXT: v_writelane_b32 v23, s45, 14 -; GFX940-NEXT: v_writelane_b32 v23, s46, 15 -; GFX940-NEXT: v_writelane_b32 v23, s47, 16 -; GFX940-NEXT: v_writelane_b32 v23, s48, 17 -; GFX940-NEXT: v_writelane_b32 v23, s49, 18 -; GFX940-NEXT: v_writelane_b32 v23, s50, 19 -; GFX940-NEXT: v_writelane_b32 v23, s51, 20 -; GFX940-NEXT: v_writelane_b32 v23, s52, 21 -; GFX940-NEXT: v_writelane_b32 v23, s53, 22 -; GFX940-NEXT: v_writelane_b32 v23, s54, 23 -; GFX940-NEXT: v_writelane_b32 v23, s55, 24 -; GFX940-NEXT: v_writelane_b32 v23, s56, 25 -; GFX940-NEXT: v_writelane_b32 v23, s57, 26 -; GFX940-NEXT: v_writelane_b32 v23, s58, 27 -; GFX940-NEXT: v_writelane_b32 v23, s59, 28 -; GFX940-NEXT: v_writelane_b32 v23, s60, 29 -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_writelane_b32 v23, s61, 30 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: s_and_b64 s[60:61], 0, exec -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_addc_u32 s60, s32, 0x4040 -; GFX940-NEXT: s_bitcmp1_b32 s60, 0 -; GFX940-NEXT: s_bitset0_b32 s60, 0 -; GFX940-NEXT: s_mov_b32 s59, s60 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s61, v23, 30 -; GFX940-NEXT: v_readlane_b32 s60, v23, 29 -; GFX940-NEXT: v_readlane_b32 s59, v23, 28 -; GFX940-NEXT: v_readlane_b32 s58, v23, 27 -; GFX940-NEXT: v_readlane_b32 s57, v23, 26 -; GFX940-NEXT: v_readlane_b32 s56, v23, 25 -; GFX940-NEXT: v_readlane_b32 s55, v23, 24 -; GFX940-NEXT: v_readlane_b32 s54, v23, 23 -; GFX940-NEXT: v_readlane_b32 s53, v23, 22 -; GFX940-NEXT: v_readlane_b32 s52, v23, 21 -; GFX940-NEXT: v_readlane_b32 s51, v23, 20 -; GFX940-NEXT: v_readlane_b32 s50, v23, 19 -; GFX940-NEXT: v_readlane_b32 s49, v23, 18 -; GFX940-NEXT: v_readlane_b32 s48, v23, 17 -; GFX940-NEXT: v_readlane_b32 s47, v23, 16 -; GFX940-NEXT: v_readlane_b32 s46, v23, 15 -; GFX940-NEXT: v_readlane_b32 s45, v23, 14 -; GFX940-NEXT: v_readlane_b32 s44, v23, 13 -; GFX940-NEXT: v_readlane_b32 s43, v23, 12 -; GFX940-NEXT: v_readlane_b32 s42, v23, 11 -; GFX940-NEXT: v_readlane_b32 s41, v23, 10 -; GFX940-NEXT: v_readlane_b32 s40, v23, 9 -; GFX940-NEXT: v_readlane_b32 s39, v23, 8 -; GFX940-NEXT: v_readlane_b32 s38, v23, 7 -; GFX940-NEXT: v_readlane_b32 s37, v23, 6 -; GFX940-NEXT: v_readlane_b32 s36, v23, 5 -; GFX940-NEXT: v_readlane_b32 s35, v23, 4 -; GFX940-NEXT: v_readlane_b32 s34, v23, 3 -; GFX940-NEXT: v_readlane_b32 s33, v23, 2 -; GFX940-NEXT: v_readlane_b32 s31, v23, 1 -; GFX940-NEXT: v_readlane_b32 s30, v23, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX940-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v23, s30, 0 +; GFX942-NEXT: v_writelane_b32 v23, s31, 1 +; GFX942-NEXT: v_writelane_b32 v23, s33, 2 +; GFX942-NEXT: v_writelane_b32 v23, s34, 3 +; GFX942-NEXT: v_writelane_b32 v23, s35, 4 +; GFX942-NEXT: v_writelane_b32 v23, s36, 5 +; GFX942-NEXT: v_writelane_b32 v23, s37, 6 +; GFX942-NEXT: v_writelane_b32 v23, s38, 7 +; GFX942-NEXT: v_writelane_b32 v23, s39, 8 +; GFX942-NEXT: v_writelane_b32 v23, s40, 9 +; GFX942-NEXT: v_writelane_b32 v23, s41, 10 +; GFX942-NEXT: v_writelane_b32 v23, s42, 11 +; GFX942-NEXT: v_writelane_b32 v23, s43, 12 +; GFX942-NEXT: v_writelane_b32 v23, s44, 13 +; GFX942-NEXT: v_writelane_b32 v23, s45, 14 +; GFX942-NEXT: v_writelane_b32 v23, s46, 15 +; GFX942-NEXT: v_writelane_b32 v23, s47, 16 +; GFX942-NEXT: v_writelane_b32 v23, s48, 17 +; GFX942-NEXT: v_writelane_b32 v23, s49, 18 +; GFX942-NEXT: v_writelane_b32 v23, s50, 19 +; GFX942-NEXT: v_writelane_b32 v23, s51, 20 +; GFX942-NEXT: v_writelane_b32 v23, s52, 21 +; GFX942-NEXT: v_writelane_b32 v23, s53, 22 +; GFX942-NEXT: v_writelane_b32 v23, s54, 23 +; GFX942-NEXT: v_writelane_b32 v23, s55, 24 +; GFX942-NEXT: v_writelane_b32 v23, s56, 25 +; GFX942-NEXT: v_writelane_b32 v23, s57, 26 +; GFX942-NEXT: v_writelane_b32 v23, s58, 27 +; GFX942-NEXT: v_writelane_b32 v23, s59, 28 +; GFX942-NEXT: v_writelane_b32 v23, s60, 29 +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_writelane_b32 v23, s61, 30 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_addc_u32 s60, s32, 0x4040 +; GFX942-NEXT: s_bitcmp1_b32 s60, 0 +; GFX942-NEXT: s_bitset0_b32 s60, 0 +; GFX942-NEXT: s_mov_b32 s59, s60 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s61, v23, 30 +; GFX942-NEXT: v_readlane_b32 s60, v23, 29 +; GFX942-NEXT: v_readlane_b32 s59, v23, 28 +; GFX942-NEXT: v_readlane_b32 s58, v23, 27 +; GFX942-NEXT: v_readlane_b32 s57, v23, 26 +; GFX942-NEXT: v_readlane_b32 s56, v23, 25 +; GFX942-NEXT: v_readlane_b32 s55, v23, 24 +; GFX942-NEXT: v_readlane_b32 s54, v23, 23 +; GFX942-NEXT: v_readlane_b32 s53, v23, 22 +; GFX942-NEXT: v_readlane_b32 s52, v23, 21 +; GFX942-NEXT: v_readlane_b32 s51, v23, 20 +; GFX942-NEXT: v_readlane_b32 s50, v23, 19 +; GFX942-NEXT: v_readlane_b32 s49, v23, 18 +; GFX942-NEXT: v_readlane_b32 s48, v23, 17 +; GFX942-NEXT: v_readlane_b32 s47, v23, 16 +; GFX942-NEXT: v_readlane_b32 s46, v23, 15 +; GFX942-NEXT: v_readlane_b32 s45, v23, 14 +; GFX942-NEXT: v_readlane_b32 s44, v23, 13 +; GFX942-NEXT: v_readlane_b32 s43, v23, 12 +; GFX942-NEXT: v_readlane_b32 s42, v23, 11 +; GFX942-NEXT: v_readlane_b32 s41, v23, 10 +; GFX942-NEXT: v_readlane_b32 s40, v23, 9 +; GFX942-NEXT: v_readlane_b32 s39, v23, 8 +; GFX942-NEXT: v_readlane_b32 s38, v23, 7 +; GFX942-NEXT: v_readlane_b32 s37, v23, 6 +; GFX942-NEXT: v_readlane_b32 s36, v23, 5 +; GFX942-NEXT: v_readlane_b32 s35, v23, 4 +; GFX942-NEXT: v_readlane_b32 s34, v23, 3 +; GFX942-NEXT: v_readlane_b32 s33, v23, 2 +; GFX942-NEXT: v_readlane_b32 s31, v23, 1 +; GFX942-NEXT: v_readlane_b32 s30, v23, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: ; GFX10_1: ; %bb.0: @@ -1049,92 +1049,92 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4010 -; GFX940-NEXT: scratch_store_dword off, v21, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v21, s30, 0 -; GFX940-NEXT: v_writelane_b32 v21, s31, 1 -; GFX940-NEXT: v_writelane_b32 v21, s33, 2 -; GFX940-NEXT: v_writelane_b32 v21, s34, 3 -; GFX940-NEXT: v_writelane_b32 v21, s35, 4 -; GFX940-NEXT: v_writelane_b32 v21, s36, 5 -; GFX940-NEXT: v_writelane_b32 v21, s37, 6 -; GFX940-NEXT: v_writelane_b32 v21, s38, 7 -; GFX940-NEXT: v_writelane_b32 v21, s39, 8 -; GFX940-NEXT: v_writelane_b32 v21, s40, 9 -; GFX940-NEXT: v_writelane_b32 v21, s41, 10 -; GFX940-NEXT: v_writelane_b32 v21, s42, 11 -; GFX940-NEXT: v_writelane_b32 v21, s43, 12 -; GFX940-NEXT: v_writelane_b32 v21, s44, 13 -; GFX940-NEXT: v_writelane_b32 v21, s45, 14 -; GFX940-NEXT: v_writelane_b32 v21, s46, 15 -; GFX940-NEXT: v_writelane_b32 v21, s47, 16 -; GFX940-NEXT: v_writelane_b32 v21, s48, 17 -; GFX940-NEXT: v_writelane_b32 v21, s49, 18 -; GFX940-NEXT: v_writelane_b32 v21, s50, 19 -; GFX940-NEXT: v_writelane_b32 v21, s51, 20 -; GFX940-NEXT: v_writelane_b32 v21, s52, 21 -; GFX940-NEXT: v_writelane_b32 v21, s53, 22 -; GFX940-NEXT: v_writelane_b32 v21, s54, 23 -; GFX940-NEXT: v_writelane_b32 v21, s55, 24 -; GFX940-NEXT: v_writelane_b32 v21, s56, 25 -; GFX940-NEXT: v_writelane_b32 v21, s57, 26 -; GFX940-NEXT: v_writelane_b32 v21, s58, 27 -; GFX940-NEXT: v_writelane_b32 v21, s59, 28 -; GFX940-NEXT: v_writelane_b32 v21, s60, 29 -; GFX940-NEXT: v_writelane_b32 v21, s61, 30 -; GFX940-NEXT: s_and_b64 s[60:61], 0, exec -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_addc_u32 s60, s32, 16 -; GFX940-NEXT: s_bitcmp1_b32 s60, 0 -; GFX940-NEXT: s_bitset0_b32 s60, 0 -; GFX940-NEXT: s_mov_b32 s59, s60 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s61, v21, 30 -; GFX940-NEXT: v_readlane_b32 s60, v21, 29 -; GFX940-NEXT: v_readlane_b32 s59, v21, 28 -; GFX940-NEXT: v_readlane_b32 s58, v21, 27 -; GFX940-NEXT: v_readlane_b32 s57, v21, 26 -; GFX940-NEXT: v_readlane_b32 s56, v21, 25 -; GFX940-NEXT: v_readlane_b32 s55, v21, 24 -; GFX940-NEXT: v_readlane_b32 s54, v21, 23 -; GFX940-NEXT: v_readlane_b32 s53, v21, 22 -; GFX940-NEXT: v_readlane_b32 s52, v21, 21 -; GFX940-NEXT: v_readlane_b32 s51, v21, 20 -; GFX940-NEXT: v_readlane_b32 s50, v21, 19 -; GFX940-NEXT: v_readlane_b32 s49, v21, 18 -; GFX940-NEXT: v_readlane_b32 s48, v21, 17 -; GFX940-NEXT: v_readlane_b32 s47, v21, 16 -; GFX940-NEXT: v_readlane_b32 s46, v21, 15 -; GFX940-NEXT: v_readlane_b32 s45, v21, 14 -; GFX940-NEXT: v_readlane_b32 s44, v21, 13 -; GFX940-NEXT: v_readlane_b32 s43, v21, 12 -; GFX940-NEXT: v_readlane_b32 s42, v21, 11 -; GFX940-NEXT: v_readlane_b32 s41, v21, 10 -; GFX940-NEXT: v_readlane_b32 s40, v21, 9 -; GFX940-NEXT: v_readlane_b32 s39, v21, 8 -; GFX940-NEXT: v_readlane_b32 s38, v21, 7 -; GFX940-NEXT: v_readlane_b32 s37, v21, 6 -; GFX940-NEXT: v_readlane_b32 s36, v21, 5 -; GFX940-NEXT: v_readlane_b32 s35, v21, 4 -; GFX940-NEXT: v_readlane_b32 s34, v21, 3 -; GFX940-NEXT: v_readlane_b32 s33, v21, 2 -; GFX940-NEXT: v_readlane_b32 s31, v21, 1 -; GFX940-NEXT: v_readlane_b32 s30, v21, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x4010 -; GFX940-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 +; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v21, s30, 0 +; GFX942-NEXT: v_writelane_b32 v21, s31, 1 +; GFX942-NEXT: v_writelane_b32 v21, s33, 2 +; GFX942-NEXT: v_writelane_b32 v21, s34, 3 +; GFX942-NEXT: v_writelane_b32 v21, s35, 4 +; GFX942-NEXT: v_writelane_b32 v21, s36, 5 +; GFX942-NEXT: v_writelane_b32 v21, s37, 6 +; GFX942-NEXT: v_writelane_b32 v21, s38, 7 +; GFX942-NEXT: v_writelane_b32 v21, s39, 8 +; GFX942-NEXT: v_writelane_b32 v21, s40, 9 +; GFX942-NEXT: v_writelane_b32 v21, s41, 10 +; GFX942-NEXT: v_writelane_b32 v21, s42, 11 +; GFX942-NEXT: v_writelane_b32 v21, s43, 12 +; GFX942-NEXT: v_writelane_b32 v21, s44, 13 +; GFX942-NEXT: v_writelane_b32 v21, s45, 14 +; GFX942-NEXT: v_writelane_b32 v21, s46, 15 +; GFX942-NEXT: v_writelane_b32 v21, s47, 16 +; GFX942-NEXT: v_writelane_b32 v21, s48, 17 +; GFX942-NEXT: v_writelane_b32 v21, s49, 18 +; GFX942-NEXT: v_writelane_b32 v21, s50, 19 +; GFX942-NEXT: v_writelane_b32 v21, s51, 20 +; GFX942-NEXT: v_writelane_b32 v21, s52, 21 +; GFX942-NEXT: v_writelane_b32 v21, s53, 22 +; GFX942-NEXT: v_writelane_b32 v21, s54, 23 +; GFX942-NEXT: v_writelane_b32 v21, s55, 24 +; GFX942-NEXT: v_writelane_b32 v21, s56, 25 +; GFX942-NEXT: v_writelane_b32 v21, s57, 26 +; GFX942-NEXT: v_writelane_b32 v21, s58, 27 +; GFX942-NEXT: v_writelane_b32 v21, s59, 28 +; GFX942-NEXT: v_writelane_b32 v21, s60, 29 +; GFX942-NEXT: v_writelane_b32 v21, s61, 30 +; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_addc_u32 s60, s32, 16 +; GFX942-NEXT: s_bitcmp1_b32 s60, 0 +; GFX942-NEXT: s_bitset0_b32 s60, 0 +; GFX942-NEXT: s_mov_b32 s59, s60 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s61, v21, 30 +; GFX942-NEXT: v_readlane_b32 s60, v21, 29 +; GFX942-NEXT: v_readlane_b32 s59, v21, 28 +; GFX942-NEXT: v_readlane_b32 s58, v21, 27 +; GFX942-NEXT: v_readlane_b32 s57, v21, 26 +; GFX942-NEXT: v_readlane_b32 s56, v21, 25 +; GFX942-NEXT: v_readlane_b32 s55, v21, 24 +; GFX942-NEXT: v_readlane_b32 s54, v21, 23 +; GFX942-NEXT: v_readlane_b32 s53, v21, 22 +; GFX942-NEXT: v_readlane_b32 s52, v21, 21 +; GFX942-NEXT: v_readlane_b32 s51, v21, 20 +; GFX942-NEXT: v_readlane_b32 s50, v21, 19 +; GFX942-NEXT: v_readlane_b32 s49, v21, 18 +; GFX942-NEXT: v_readlane_b32 s48, v21, 17 +; GFX942-NEXT: v_readlane_b32 s47, v21, 16 +; GFX942-NEXT: v_readlane_b32 s46, v21, 15 +; GFX942-NEXT: v_readlane_b32 s45, v21, 14 +; GFX942-NEXT: v_readlane_b32 s44, v21, 13 +; GFX942-NEXT: v_readlane_b32 s43, v21, 12 +; GFX942-NEXT: v_readlane_b32 s42, v21, 11 +; GFX942-NEXT: v_readlane_b32 s41, v21, 10 +; GFX942-NEXT: v_readlane_b32 s40, v21, 9 +; GFX942-NEXT: v_readlane_b32 s39, v21, 8 +; GFX942-NEXT: v_readlane_b32 s38, v21, 7 +; GFX942-NEXT: v_readlane_b32 s37, v21, 6 +; GFX942-NEXT: v_readlane_b32 s36, v21, 5 +; GFX942-NEXT: v_readlane_b32 s35, v21, 4 +; GFX942-NEXT: v_readlane_b32 s34, v21, 3 +; GFX942-NEXT: v_readlane_b32 s33, v21, 2 +; GFX942-NEXT: v_readlane_b32 s31, v21, 1 +; GFX942-NEXT: v_readlane_b32 s30, v21, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 +; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: ; GFX10_1: ; %bb.0: @@ -1777,92 +1777,92 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX940-NEXT: scratch_store_dword off, v22, s2 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v22, s30, 0 -; GFX940-NEXT: v_writelane_b32 v22, s31, 1 -; GFX940-NEXT: v_writelane_b32 v22, s33, 2 -; GFX940-NEXT: v_writelane_b32 v22, s34, 3 -; GFX940-NEXT: v_writelane_b32 v22, s35, 4 -; GFX940-NEXT: v_writelane_b32 v22, s36, 5 -; GFX940-NEXT: v_writelane_b32 v22, s37, 6 -; GFX940-NEXT: v_writelane_b32 v22, s38, 7 -; GFX940-NEXT: v_writelane_b32 v22, s39, 8 -; GFX940-NEXT: v_writelane_b32 v22, s40, 9 -; GFX940-NEXT: v_writelane_b32 v22, s41, 10 -; GFX940-NEXT: v_writelane_b32 v22, s42, 11 -; GFX940-NEXT: v_writelane_b32 v22, s43, 12 -; GFX940-NEXT: v_writelane_b32 v22, s44, 13 -; GFX940-NEXT: v_writelane_b32 v22, s45, 14 -; GFX940-NEXT: v_writelane_b32 v22, s46, 15 -; GFX940-NEXT: v_writelane_b32 v22, s47, 16 -; GFX940-NEXT: v_writelane_b32 v22, s48, 17 -; GFX940-NEXT: v_writelane_b32 v22, s49, 18 -; GFX940-NEXT: v_writelane_b32 v22, s50, 19 -; GFX940-NEXT: v_writelane_b32 v22, s51, 20 -; GFX940-NEXT: v_writelane_b32 v22, s52, 21 -; GFX940-NEXT: v_writelane_b32 v22, s53, 22 -; GFX940-NEXT: v_writelane_b32 v22, s54, 23 -; GFX940-NEXT: v_writelane_b32 v22, s55, 24 -; GFX940-NEXT: v_writelane_b32 v22, s56, 25 -; GFX940-NEXT: v_writelane_b32 v22, s57, 26 -; GFX940-NEXT: s_add_i32 s0, s32, 64 -; GFX940-NEXT: v_writelane_b32 v22, s59, 27 -; GFX940-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NEXT: v_writelane_b32 v22, s60, 28 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use alloca0 v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_add_i32 s59, s32, 0x4240 -; GFX940-NEXT: v_writelane_b32 v22, s61, 29 -; GFX940-NEXT: s_and_b64 s[60:61], 0, exec -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s61, v22, 29 -; GFX940-NEXT: v_readlane_b32 s60, v22, 28 -; GFX940-NEXT: v_readlane_b32 s59, v22, 27 -; GFX940-NEXT: v_readlane_b32 s57, v22, 26 -; GFX940-NEXT: v_readlane_b32 s56, v22, 25 -; GFX940-NEXT: v_readlane_b32 s55, v22, 24 -; GFX940-NEXT: v_readlane_b32 s54, v22, 23 -; GFX940-NEXT: v_readlane_b32 s53, v22, 22 -; GFX940-NEXT: v_readlane_b32 s52, v22, 21 -; GFX940-NEXT: v_readlane_b32 s51, v22, 20 -; GFX940-NEXT: v_readlane_b32 s50, v22, 19 -; GFX940-NEXT: v_readlane_b32 s49, v22, 18 -; GFX940-NEXT: v_readlane_b32 s48, v22, 17 -; GFX940-NEXT: v_readlane_b32 s47, v22, 16 -; GFX940-NEXT: v_readlane_b32 s46, v22, 15 -; GFX940-NEXT: v_readlane_b32 s45, v22, 14 -; GFX940-NEXT: v_readlane_b32 s44, v22, 13 -; GFX940-NEXT: v_readlane_b32 s43, v22, 12 -; GFX940-NEXT: v_readlane_b32 s42, v22, 11 -; GFX940-NEXT: v_readlane_b32 s41, v22, 10 -; GFX940-NEXT: v_readlane_b32 s40, v22, 9 -; GFX940-NEXT: v_readlane_b32 s39, v22, 8 -; GFX940-NEXT: v_readlane_b32 s38, v22, 7 -; GFX940-NEXT: v_readlane_b32 s37, v22, 6 -; GFX940-NEXT: v_readlane_b32 s36, v22, 5 -; GFX940-NEXT: v_readlane_b32 s35, v22, 4 -; GFX940-NEXT: v_readlane_b32 s34, v22, 3 -; GFX940-NEXT: v_readlane_b32 s33, v22, 2 -; GFX940-NEXT: v_readlane_b32 s31, v22, 1 -; GFX940-NEXT: v_readlane_b32 s30, v22, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX940-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v22, s30, 0 +; GFX942-NEXT: v_writelane_b32 v22, s31, 1 +; GFX942-NEXT: v_writelane_b32 v22, s33, 2 +; GFX942-NEXT: v_writelane_b32 v22, s34, 3 +; GFX942-NEXT: v_writelane_b32 v22, s35, 4 +; GFX942-NEXT: v_writelane_b32 v22, s36, 5 +; GFX942-NEXT: v_writelane_b32 v22, s37, 6 +; GFX942-NEXT: v_writelane_b32 v22, s38, 7 +; GFX942-NEXT: v_writelane_b32 v22, s39, 8 +; GFX942-NEXT: v_writelane_b32 v22, s40, 9 +; GFX942-NEXT: v_writelane_b32 v22, s41, 10 +; GFX942-NEXT: v_writelane_b32 v22, s42, 11 +; GFX942-NEXT: v_writelane_b32 v22, s43, 12 +; GFX942-NEXT: v_writelane_b32 v22, s44, 13 +; GFX942-NEXT: v_writelane_b32 v22, s45, 14 +; GFX942-NEXT: v_writelane_b32 v22, s46, 15 +; GFX942-NEXT: v_writelane_b32 v22, s47, 16 +; GFX942-NEXT: v_writelane_b32 v22, s48, 17 +; GFX942-NEXT: v_writelane_b32 v22, s49, 18 +; GFX942-NEXT: v_writelane_b32 v22, s50, 19 +; GFX942-NEXT: v_writelane_b32 v22, s51, 20 +; GFX942-NEXT: v_writelane_b32 v22, s52, 21 +; GFX942-NEXT: v_writelane_b32 v22, s53, 22 +; GFX942-NEXT: v_writelane_b32 v22, s54, 23 +; GFX942-NEXT: v_writelane_b32 v22, s55, 24 +; GFX942-NEXT: v_writelane_b32 v22, s56, 25 +; GFX942-NEXT: v_writelane_b32 v22, s57, 26 +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_writelane_b32 v22, s59, 27 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: v_writelane_b32 v22, s60, 28 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use alloca0 v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_add_i32 s59, s32, 0x4240 +; GFX942-NEXT: v_writelane_b32 v22, s61, 29 +; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s61, v22, 29 +; GFX942-NEXT: v_readlane_b32 s60, v22, 28 +; GFX942-NEXT: v_readlane_b32 s59, v22, 27 +; GFX942-NEXT: v_readlane_b32 s57, v22, 26 +; GFX942-NEXT: v_readlane_b32 s56, v22, 25 +; GFX942-NEXT: v_readlane_b32 s55, v22, 24 +; GFX942-NEXT: v_readlane_b32 s54, v22, 23 +; GFX942-NEXT: v_readlane_b32 s53, v22, 22 +; GFX942-NEXT: v_readlane_b32 s52, v22, 21 +; GFX942-NEXT: v_readlane_b32 s51, v22, 20 +; GFX942-NEXT: v_readlane_b32 s50, v22, 19 +; GFX942-NEXT: v_readlane_b32 s49, v22, 18 +; GFX942-NEXT: v_readlane_b32 s48, v22, 17 +; GFX942-NEXT: v_readlane_b32 s47, v22, 16 +; GFX942-NEXT: v_readlane_b32 s46, v22, 15 +; GFX942-NEXT: v_readlane_b32 s45, v22, 14 +; GFX942-NEXT: v_readlane_b32 s44, v22, 13 +; GFX942-NEXT: v_readlane_b32 s43, v22, 12 +; GFX942-NEXT: v_readlane_b32 s42, v22, 11 +; GFX942-NEXT: v_readlane_b32 s41, v22, 10 +; GFX942-NEXT: v_readlane_b32 s40, v22, 9 +; GFX942-NEXT: v_readlane_b32 s39, v22, 8 +; GFX942-NEXT: v_readlane_b32 s38, v22, 7 +; GFX942-NEXT: v_readlane_b32 s37, v22, 6 +; GFX942-NEXT: v_readlane_b32 s36, v22, 5 +; GFX942-NEXT: v_readlane_b32 s35, v22, 4 +; GFX942-NEXT: v_readlane_b32 s34, v22, 3 +; GFX942-NEXT: v_readlane_b32 s33, v22, 2 +; GFX942-NEXT: v_readlane_b32 s31, v22, 1 +; GFX942-NEXT: v_readlane_b32 s30, v22, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: ; GFX10_1: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll index e1589ccd7350f..1379eb61e0853 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -47,15 +47,15 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -115,14 +115,14 @@ define amdgpu_kernel void @workgroup_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -184,15 +184,15 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -256,15 +256,15 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -328,15 +328,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -396,14 +396,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -465,15 +465,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -537,15 +537,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -621,17 +621,17 @@ define amdgpu_kernel void @agent_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -705,17 +705,17 @@ define amdgpu_kernel void @agent_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -795,19 +795,19 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -893,19 +893,19 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -991,17 +991,17 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1075,17 +1075,17 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1165,19 +1165,19 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1263,19 +1263,19 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1363,17 +1363,17 @@ define amdgpu_kernel void @system_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1449,17 +1449,17 @@ define amdgpu_kernel void @system_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1545,19 +1545,19 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1649,19 +1649,19 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1751,17 +1751,17 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1837,17 +1837,17 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1933,19 +1933,19 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2037,19 +2037,19 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll index f564f8e4e0d67..971015b391ca8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -48,14 +48,14 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -116,14 +116,14 @@ define amdgpu_kernel void @workgroup_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -182,14 +182,14 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -248,14 +248,14 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -308,13 +308,13 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -365,13 +365,13 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -422,13 +422,13 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -479,13 +479,13 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -542,14 +542,14 @@ define amdgpu_kernel void @agent_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -610,14 +610,14 @@ define amdgpu_kernel void @agent_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -676,14 +676,14 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -742,14 +742,14 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -802,13 +802,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -859,13 +859,13 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -916,13 +916,13 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -973,13 +973,13 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1036,14 +1036,14 @@ define amdgpu_kernel void @system_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1104,14 +1104,14 @@ define amdgpu_kernel void @system_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1170,14 +1170,14 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1236,14 +1236,14 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1296,13 +1296,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1353,13 +1353,13 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1410,13 +1410,13 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1467,13 +1467,13 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index ebda33d01a438..0e459ed0f1243 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -42,13 +42,13 @@ define amdgpu_kernel void @singlethread_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -99,13 +99,13 @@ define amdgpu_kernel void @singlethread_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -156,13 +156,13 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -213,13 +213,13 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -270,13 +270,13 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -327,13 +327,13 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -384,13 +384,13 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -441,13 +441,13 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: singlethread_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: singlethread_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: singlethread_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: singlethread_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: singlethread_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -498,13 +498,13 @@ define amdgpu_kernel void @wavefront_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -555,13 +555,13 @@ define amdgpu_kernel void @wavefront_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -612,13 +612,13 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -669,13 +669,13 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -726,13 +726,13 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -783,13 +783,13 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -840,13 +840,13 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -897,13 +897,13 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT: ; %bb.0: ; %entry ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: wavefront_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: wavefront_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: wavefront_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: wavefront_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: wavefront_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -964,16 +964,16 @@ define amdgpu_kernel void @workgroup_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1040,15 +1040,15 @@ define amdgpu_kernel void @workgroup_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1117,16 +1117,16 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1197,16 +1197,16 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1272,15 +1272,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1340,14 +1340,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1409,15 +1409,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1481,15 +1481,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1565,17 +1565,17 @@ define amdgpu_kernel void @agent_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1649,17 +1649,17 @@ define amdgpu_kernel void @agent_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1739,19 +1739,19 @@ define amdgpu_kernel void @agent_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1837,19 +1837,19 @@ define amdgpu_kernel void @agent_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1935,17 +1935,17 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2019,17 +2019,17 @@ define amdgpu_kernel void @agent_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2109,19 +2109,19 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2207,19 +2207,19 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: agent_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2307,17 +2307,17 @@ define amdgpu_kernel void @system_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2393,17 +2393,17 @@ define amdgpu_kernel void @system_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2489,19 +2489,19 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2593,19 +2593,19 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2695,17 +2695,17 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acquire_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acquire_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2781,17 +2781,17 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_release_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_release_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_release_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2877,19 +2877,19 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: system_one_as_acq_rel_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_acq_rel_fence: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2981,19 +2981,19 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: system_one_as_seq_cst_fence: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: system_one_as_seq_cst_fence: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index 321f572d57cb2..5af37809443e0 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_agent_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -260,29 +260,29 @@ define amdgpu_kernel void @flat_agent_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -434,31 +434,31 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -624,33 +624,33 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -801,25 +801,25 @@ define amdgpu_kernel void @flat_agent_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -935,25 +935,25 @@ define amdgpu_kernel void @flat_agent_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1077,29 +1077,29 @@ define amdgpu_kernel void @flat_agent_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1235,29 +1235,29 @@ define amdgpu_kernel void @flat_agent_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1385,25 +1385,25 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1534,29 +1534,29 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1692,29 +1692,29 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1865,33 +1865,33 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2054,33 +2054,33 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2249,33 +2249,33 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2451,37 +2451,37 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2673,37 +2673,37 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2922,33 +2922,33 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3160,37 +3160,37 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3407,37 +3407,37 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3669,41 +3669,41 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3947,41 +3947,41 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4217,37 +4217,37 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4471,37 +4471,37 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4733,41 +4733,41 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5011,41 +5011,41 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5289,41 +5289,41 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5567,41 +5567,41 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5845,41 +5845,41 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6123,41 +6123,41 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6401,41 +6401,41 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6679,41 +6679,41 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6956,39 +6956,39 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7232,41 +7232,41 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7517,43 +7517,43 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7817,45 +7817,45 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8129,45 +8129,45 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8433,41 +8433,41 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8721,41 +8721,41 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9013,45 +9013,45 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9325,45 +9325,45 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9637,45 +9637,45 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9949,45 +9949,45 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10261,45 +10261,45 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10569,45 +10569,45 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10881,45 +10881,45 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11193,45 +11193,45 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11420,29 +11420,29 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11587,29 +11587,29 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11766,32 +11766,32 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11966,34 +11966,34 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12148,25 +12148,25 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12282,25 +12282,25 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12424,29 +12424,29 @@ define amdgpu_kernel void @flat_agent_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12582,29 +12582,29 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12732,25 +12732,25 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12879,29 +12879,29 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13035,29 +13035,29 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13206,33 +13206,33 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13391,33 +13391,33 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13589,34 +13589,34 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13801,38 +13801,38 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14033,38 +14033,38 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14287,33 +14287,33 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14523,37 +14523,37 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14768,37 +14768,37 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15028,41 +15028,41 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15302,41 +15302,41 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15568,37 +15568,37 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15818,37 +15818,37 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16076,41 +16076,41 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16350,41 +16350,41 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16624,41 +16624,41 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16898,41 +16898,41 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17172,41 +17172,41 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17446,41 +17446,41 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17720,41 +17720,41 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17994,41 +17994,41 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18269,39 +18269,39 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18550,42 +18550,42 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18840,43 +18840,43 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19145,46 +19145,46 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19467,46 +19467,46 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19781,42 +19781,42 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20079,42 +20079,42 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20381,46 +20381,46 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20703,46 +20703,46 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21025,46 +21025,46 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21347,46 +21347,46 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21669,46 +21669,46 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21987,46 +21987,46 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22309,46 +22309,46 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22631,46 +22631,46 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll index df5b45dea0c2f..30c0a322d7ddc 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_nontemporal_load_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_nontemporal_load_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: flat_nontemporal_load_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: flat_nontemporal_load_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_load_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -358,47 +358,47 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_nontemporal_load_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s4 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s4, 2 -; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s4, v0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0 -; GFX940-NOTTGSPLIT-NEXT: ; implicit-def: $sgpr4 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_nontemporal_load_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s4 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s4, 2 +; GFX942-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s4, v0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0 +; GFX942-NOTTGSPLIT-NEXT: ; implicit-def: $sgpr4 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: flat_nontemporal_load_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s4, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s4 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s4, 2 -; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s4, v0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s4, 0 -; GFX940-TGSPLIT-NEXT: ; implicit-def: $sgpr4 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: flat_nontemporal_load_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s4, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s4 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s4, 2 +; GFX942-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s4, v0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s4, 0 +; GFX942-TGSPLIT-NEXT: ; implicit-def: $sgpr4 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] nt +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -615,29 +615,29 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_nontemporal_store_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_nontemporal_store_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: flat_nontemporal_store_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: flat_nontemporal_store_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_store_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -870,47 +870,47 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_nontemporal_store_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s2, v0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0 -; GFX940-NOTTGSPLIT-NEXT: ; implicit-def: $sgpr2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_nontemporal_store_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s2, v0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0 +; GFX942-NOTTGSPLIT-NEXT: ; implicit-def: $sgpr2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: flat_nontemporal_store_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[2:3] -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s2, v0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 0 -; GFX940-TGSPLIT-NEXT: ; implicit-def: $sgpr2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-TGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: flat_nontemporal_store_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[2:3] +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s2, v0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0 +; GFX942-TGSPLIT-NEXT: ; implicit-def: $sgpr2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1130,30 +1130,30 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_nontemporal_volatile_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_nontemporal_volatile_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: flat_nontemporal_volatile_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: flat_nontemporal_volatile_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_volatile_load: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll index 4fa15c194adf6..b80dfaea01653 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_singlethread_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -260,29 +260,29 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -427,29 +427,29 @@ define amdgpu_kernel void @flat_singlethread_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -594,29 +594,29 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -745,25 +745,25 @@ define amdgpu_kernel void @flat_singlethread_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -879,25 +879,25 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1013,25 +1013,25 @@ define amdgpu_kernel void @flat_singlethread_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1147,25 +1147,25 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1281,25 +1281,25 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1415,25 +1415,25 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1549,25 +1549,25 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1683,25 +1683,25 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1817,25 +1817,25 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1973,31 +1973,31 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2152,31 +2152,31 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2331,31 +2331,31 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2552,33 +2552,33 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2775,33 +2775,33 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2998,33 +2998,33 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3221,33 +3221,33 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3444,33 +3444,33 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3667,33 +3667,33 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3890,33 +3890,33 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4113,33 +4113,33 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4336,33 +4336,33 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4559,33 +4559,33 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4782,33 +4782,33 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5005,33 +5005,33 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5228,33 +5228,33 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5451,33 +5451,33 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5674,33 +5674,33 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5919,39 +5919,39 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6188,39 +6188,39 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6457,39 +6457,39 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6726,39 +6726,39 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6995,39 +6995,39 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7264,39 +7264,39 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7533,39 +7533,39 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7802,39 +7802,39 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8071,39 +8071,39 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8340,39 +8340,39 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8609,39 +8609,39 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8878,39 +8878,39 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9147,39 +9147,39 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9416,39 +9416,39 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9685,39 +9685,39 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9884,29 +9884,29 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10051,29 +10051,29 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10218,29 +10218,29 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10385,29 +10385,29 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10536,25 +10536,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10670,25 +10670,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10804,25 +10804,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10938,25 +10938,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11072,25 +11072,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11206,25 +11206,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11340,25 +11340,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11474,25 +11474,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11608,25 +11608,25 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11764,31 +11764,31 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11943,31 +11943,31 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12122,31 +12122,31 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12343,33 +12343,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12566,33 +12566,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12789,33 +12789,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13012,33 +13012,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13235,33 +13235,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13458,33 +13458,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13681,33 +13681,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13904,33 +13904,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14127,33 +14127,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14350,33 +14350,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14573,33 +14573,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14796,33 +14796,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15019,33 +15019,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15242,33 +15242,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15465,33 +15465,33 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15710,39 +15710,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15979,39 +15979,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16248,39 +16248,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16517,39 +16517,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16786,39 +16786,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17055,39 +17055,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17324,39 +17324,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17593,39 +17593,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17862,39 +17862,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18131,39 +18131,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18400,39 +18400,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18669,39 +18669,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18938,39 +18938,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19207,39 +19207,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19476,39 +19476,39 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 6f9773f7cfd2e..1ec942ea5f47b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_system_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -260,29 +260,29 @@ define amdgpu_kernel void @flat_system_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -436,31 +436,31 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -628,33 +628,33 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -805,25 +805,25 @@ define amdgpu_kernel void @flat_system_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -939,25 +939,25 @@ define amdgpu_kernel void @flat_system_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1083,29 +1083,29 @@ define amdgpu_kernel void @flat_system_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1245,29 +1245,29 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1397,25 +1397,25 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1548,29 +1548,29 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1708,29 +1708,29 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1887,33 +1887,33 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2082,33 +2082,33 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2281,33 +2281,33 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2487,37 +2487,37 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2715,37 +2715,37 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2966,33 +2966,33 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3206,37 +3206,37 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3455,37 +3455,37 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3723,41 +3723,41 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4007,41 +4007,41 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4281,37 +4281,37 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4537,37 +4537,37 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4803,41 +4803,41 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5087,41 +5087,41 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5371,41 +5371,41 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5655,41 +5655,41 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5939,41 +5939,41 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6223,41 +6223,41 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6507,41 +6507,41 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6791,41 +6791,41 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7070,39 +7070,39 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7348,41 +7348,41 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7635,43 +7635,43 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7941,45 +7941,45 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8259,45 +8259,45 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8567,41 +8567,41 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8857,41 +8857,41 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9153,45 +9153,45 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9471,45 +9471,45 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9789,45 +9789,45 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10107,45 +10107,45 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10425,45 +10425,45 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10739,45 +10739,45 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11057,45 +11057,45 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11375,45 +11375,45 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11604,29 +11604,29 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11771,29 +11771,29 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11952,32 +11952,32 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12154,34 +12154,34 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12336,25 +12336,25 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12470,25 +12470,25 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12614,29 +12614,29 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12776,29 +12776,29 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12928,25 +12928,25 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13077,29 +13077,29 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13235,29 +13235,29 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13412,33 +13412,33 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13603,33 +13603,33 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13805,34 +13805,34 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14021,38 +14021,38 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14259,38 +14259,38 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14515,33 +14515,33 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14753,37 +14753,37 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15000,37 +15000,37 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15266,41 +15266,41 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15546,41 +15546,41 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15816,37 +15816,37 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16068,37 +16068,37 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16330,41 +16330,41 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16610,41 +16610,41 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16890,41 +16890,41 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17170,41 +17170,41 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17450,41 +17450,41 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17730,41 +17730,41 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18010,41 +18010,41 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18290,41 +18290,41 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18567,39 +18567,39 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18850,42 +18850,42 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19142,43 +19142,43 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19453,46 +19453,46 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19781,46 +19781,46 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20099,42 +20099,42 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20399,42 +20399,42 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20705,46 +20705,46 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21033,46 +21033,46 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21361,46 +21361,46 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21689,46 +21689,46 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22017,46 +22017,46 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22341,46 +22341,46 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22669,46 +22669,46 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -22997,46 +22997,46 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index 0d837e42c6155..588f06f1be054 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_wavefront_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -260,29 +260,29 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -427,29 +427,29 @@ define amdgpu_kernel void @flat_wavefront_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -594,29 +594,29 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -745,25 +745,25 @@ define amdgpu_kernel void @flat_wavefront_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -879,25 +879,25 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1013,25 +1013,25 @@ define amdgpu_kernel void @flat_wavefront_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1147,25 +1147,25 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1281,25 +1281,25 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1415,25 +1415,25 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1549,25 +1549,25 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1683,25 +1683,25 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1817,25 +1817,25 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1973,31 +1973,31 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2152,31 +2152,31 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2331,31 +2331,31 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2552,33 +2552,33 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2775,33 +2775,33 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2998,33 +2998,33 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3221,33 +3221,33 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3444,33 +3444,33 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3667,33 +3667,33 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3890,33 +3890,33 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4113,33 +4113,33 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4336,33 +4336,33 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4559,33 +4559,33 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4782,33 +4782,33 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5005,33 +5005,33 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5228,33 +5228,33 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5451,33 +5451,33 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5674,33 +5674,33 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5919,39 +5919,39 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6188,39 +6188,39 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6457,39 +6457,39 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6726,39 +6726,39 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6995,39 +6995,39 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7264,39 +7264,39 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7533,39 +7533,39 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7802,39 +7802,39 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8071,39 +8071,39 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8340,39 +8340,39 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8609,39 +8609,39 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8878,39 +8878,39 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9147,39 +9147,39 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9416,39 +9416,39 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9685,39 +9685,39 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9884,29 +9884,29 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10051,29 +10051,29 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10218,29 +10218,29 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10385,29 +10385,29 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10536,25 +10536,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10670,25 +10670,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10804,25 +10804,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10938,25 +10938,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11072,25 +11072,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11206,25 +11206,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11340,25 +11340,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11474,25 +11474,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11608,25 +11608,25 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11764,31 +11764,31 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11943,31 +11943,31 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12122,31 +12122,31 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12343,33 +12343,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12566,33 +12566,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12789,33 +12789,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13012,33 +13012,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13235,33 +13235,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13458,33 +13458,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13681,33 +13681,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13904,33 +13904,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14127,33 +14127,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14350,33 +14350,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14573,33 +14573,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14796,33 +14796,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15019,33 +15019,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15242,33 +15242,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15465,33 +15465,33 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15710,39 +15710,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15979,39 +15979,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16248,39 +16248,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16517,39 +16517,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16786,39 +16786,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17055,39 +17055,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17324,39 +17324,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17593,39 +17593,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17862,39 +17862,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18131,39 +18131,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18400,39 +18400,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18669,39 +18669,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18938,39 +18938,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19207,39 +19207,39 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index 71dcfa060c83c..ee7d79a8a8cbb 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -5,8 +5,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -93,29 +93,29 @@ define amdgpu_kernel void @flat_workgroup_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -260,29 +260,29 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -433,31 +433,31 @@ define amdgpu_kernel void @flat_workgroup_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -619,33 +619,33 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -788,25 +788,25 @@ define amdgpu_kernel void @flat_workgroup_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -922,25 +922,25 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1063,27 +1063,27 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1214,27 +1214,27 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1358,25 +1358,25 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1501,28 +1501,28 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1652,27 +1652,27 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1812,30 +1812,30 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1982,30 +1982,30 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2164,33 +2164,33 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2362,35 +2362,35 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2572,35 +2572,35 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2811,33 +2811,33 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3043,36 +3043,36 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3283,35 +3283,35 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3532,38 +3532,38 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3791,38 +3791,38 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4043,36 +4043,36 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4285,36 +4285,36 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4534,38 +4534,38 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4793,38 +4793,38 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5052,38 +5052,38 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5311,38 +5311,38 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5576,39 +5576,39 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5851,41 +5851,41 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6133,41 +6133,41 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6425,43 +6425,43 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6725,43 +6725,43 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7018,41 +7018,41 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7301,41 +7301,41 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7589,43 +7589,43 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7889,43 +7889,43 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8189,43 +8189,43 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8489,43 +8489,43 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8789,43 +8789,43 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9087,43 +9087,43 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9387,43 +9387,43 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9687,43 +9687,43 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9904,29 +9904,29 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10071,29 +10071,29 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10241,30 +10241,30 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10419,31 +10419,31 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_load_dword v2, v[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10584,25 +10584,25 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10718,25 +10718,25 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10855,26 +10855,26 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10999,26 +10999,26 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11140,25 +11140,25 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11278,27 +11278,27 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11421,26 +11421,26 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11569,28 +11569,28 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11723,28 +11723,28 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11895,32 +11895,32 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12085,33 +12085,33 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12284,33 +12284,33 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12519,33 +12519,33 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12746,35 +12746,35 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12978,34 +12978,34 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13215,36 +13215,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13458,36 +13458,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13698,35 +13698,35 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13931,35 +13931,35 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14167,36 +14167,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14410,36 +14410,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14653,36 +14653,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14896,36 +14896,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15139,36 +15139,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15382,36 +15382,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15625,36 +15625,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15868,36 +15868,36 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16126,39 +16126,39 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16398,40 +16398,40 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16675,40 +16675,40 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16957,41 +16957,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17246,41 +17246,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17532,40 +17532,40 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17811,40 +17811,40 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18091,41 +18091,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18380,41 +18380,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18669,41 +18669,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18958,41 +18958,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19247,41 +19247,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19534,41 +19534,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19823,41 +19823,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20112,41 +20112,41 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index 586b8ec05f30c..b9487f8e14c2b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -122,27 +122,27 @@ define amdgpu_kernel void @global_agent_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -303,27 +303,27 @@ define amdgpu_kernel void @global_agent_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -492,29 +492,29 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -694,29 +694,29 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -876,25 +876,25 @@ define amdgpu_kernel void @global_agent_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1029,25 +1029,25 @@ define amdgpu_kernel void @global_agent_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1191,29 +1191,29 @@ define amdgpu_kernel void @global_agent_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1369,29 +1369,29 @@ define amdgpu_kernel void @global_agent_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1536,25 +1536,25 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1702,29 +1702,29 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1876,29 +1876,29 @@ define amdgpu_kernel void @global_agent_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2067,33 +2067,33 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2272,33 +2272,33 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2477,31 +2477,31 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2682,35 +2682,35 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2907,35 +2907,35 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3139,33 +3139,33 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3370,37 +3370,37 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3609,37 +3609,37 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3865,41 +3865,41 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4135,41 +4135,41 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4396,37 +4396,37 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4641,37 +4641,37 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4895,41 +4895,41 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5165,41 +5165,41 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5435,41 +5435,41 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5705,41 +5705,41 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5975,41 +5975,41 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6245,41 +6245,41 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6515,41 +6515,41 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6785,41 +6785,41 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7049,37 +7049,37 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7306,39 +7306,39 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7571,41 +7571,41 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7853,43 +7853,43 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8147,43 +8147,43 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8432,39 +8432,39 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8701,39 +8701,39 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8975,43 +8975,43 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9269,43 +9269,43 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9563,43 +9563,43 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9857,43 +9857,43 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10151,43 +10151,43 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10441,43 +10441,43 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10735,43 +10735,43 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11029,43 +11029,43 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11270,27 +11270,27 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11451,27 +11451,27 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11640,29 +11640,29 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11842,29 +11842,29 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12024,25 +12024,25 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12177,25 +12177,25 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12339,29 +12339,29 @@ define amdgpu_kernel void @global_agent_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12517,29 +12517,29 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12684,25 +12684,25 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12850,29 +12850,29 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13024,29 +13024,29 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13215,33 +13215,33 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13420,33 +13420,33 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13625,31 +13625,31 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13830,35 +13830,35 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14055,35 +14055,35 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14287,33 +14287,33 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14518,37 +14518,37 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14757,37 +14757,37 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15013,41 +15013,41 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15283,41 +15283,41 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15544,37 +15544,37 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15789,37 +15789,37 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16043,41 +16043,41 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16313,41 +16313,41 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16583,41 +16583,41 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16853,41 +16853,41 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17123,41 +17123,41 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17393,41 +17393,41 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17663,41 +17663,41 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17933,41 +17933,41 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18197,37 +18197,37 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18454,39 +18454,39 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18728,43 +18728,43 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19022,43 +19022,43 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19307,39 +19307,39 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19576,39 +19576,39 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19850,43 +19850,43 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20144,43 +20144,43 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20438,43 +20438,43 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20732,43 +20732,43 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21026,43 +21026,43 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21316,43 +21316,43 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21610,43 +21610,43 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21904,43 +21904,43 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll index ebcc900307c46..a6bd1b678f95e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -115,29 +115,29 @@ define amdgpu_kernel void @global_nontemporal_load_0( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_nontemporal_load_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_nontemporal_load_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: global_nontemporal_load_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: global_nontemporal_load_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_load_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -345,37 +345,37 @@ define amdgpu_kernel void @global_nontemporal_load_1( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_nontemporal_load_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s4 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s4, 2 -; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v1, s4, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v1, s[2:3] nt -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_nontemporal_load_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s4, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s4 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s4, 2 +; GFX942-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v1, s4, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v1, s[2:3] nt +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: global_nontemporal_load_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s4, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s4 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s4, 2 -; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e64 v1, s4, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v1, s[2:3] nt -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: global_nontemporal_load_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s4, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s4 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s4, 2 +; GFX942-TGSPLIT-NEXT: v_lshlrev_b32_e64 v1, s4, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v1, s[2:3] nt +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -553,29 +553,29 @@ define amdgpu_kernel void @global_nontemporal_store_0( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_nontemporal_store_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_nontemporal_store_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: global_nontemporal_store_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: global_nontemporal_store_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_store_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -763,35 +763,35 @@ define amdgpu_kernel void @global_nontemporal_store_1( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_nontemporal_store_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s3, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s3 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s3, 2 -; GFX940-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s3, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_nontemporal_store_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s3 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s3, 2 +; GFX942-NOTTGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s3, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: global_nontemporal_store_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s3, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s3 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s3, 2 -; GFX940-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s3, v0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: global_nontemporal_store_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s3, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s3 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s3, 2 +; GFX942-TGSPLIT-NEXT: v_lshlrev_b32_e64 v0, s3, v0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -972,27 +972,27 @@ define amdgpu_kernel void @global_nontemporal_volatile_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_nontemporal_volatile_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_nontemporal_volatile_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: global_nontemporal_volatile_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: global_nontemporal_volatile_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_volatile_load: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll index ad08dc1777f64..a5de6a92db1af 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -122,27 +122,27 @@ define amdgpu_kernel void @global_singlethread_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -303,27 +303,27 @@ define amdgpu_kernel void @global_singlethread_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -484,27 +484,27 @@ define amdgpu_kernel void @global_singlethread_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -665,27 +665,27 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -825,25 +825,25 @@ define amdgpu_kernel void @global_singlethread_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -978,25 +978,25 @@ define amdgpu_kernel void @global_singlethread_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1131,25 +1131,25 @@ define amdgpu_kernel void @global_singlethread_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1284,25 +1284,25 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1435,25 +1435,25 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1586,25 +1586,25 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1737,25 +1737,25 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1888,25 +1888,25 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2039,25 +2039,25 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2206,29 +2206,29 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2386,29 +2386,29 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2566,29 +2566,29 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2770,33 +2770,33 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2986,33 +2986,33 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3202,33 +3202,33 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3418,33 +3418,33 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3634,33 +3634,33 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3850,33 +3850,33 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4066,33 +4066,33 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4282,33 +4282,33 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4498,33 +4498,33 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4714,33 +4714,33 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4930,33 +4930,33 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5146,33 +5146,33 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5362,33 +5362,33 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5578,33 +5578,33 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5794,33 +5794,33 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6028,37 +6028,37 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6276,37 +6276,37 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6524,37 +6524,37 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6772,37 +6772,37 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7020,37 +7020,37 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7268,37 +7268,37 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7516,37 +7516,37 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7764,37 +7764,37 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8012,37 +8012,37 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8260,37 +8260,37 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8508,37 +8508,37 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8756,37 +8756,37 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9004,37 +9004,37 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9252,37 +9252,37 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9500,37 +9500,37 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9713,27 +9713,27 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9894,27 +9894,27 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10075,27 +10075,27 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10256,27 +10256,27 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10416,25 +10416,25 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10569,25 +10569,25 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10722,25 +10722,25 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10875,25 +10875,25 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11026,25 +11026,25 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11177,25 +11177,25 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11328,25 +11328,25 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11479,25 +11479,25 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11630,25 +11630,25 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11797,29 +11797,29 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11977,29 +11977,29 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12157,29 +12157,29 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12361,33 +12361,33 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12577,33 +12577,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12793,33 +12793,33 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13009,33 +13009,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13225,33 +13225,33 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13441,33 +13441,33 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13657,33 +13657,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13873,33 +13873,33 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14089,33 +14089,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14305,33 +14305,33 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14521,33 +14521,33 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14737,33 +14737,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14953,33 +14953,33 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15169,33 +15169,33 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15385,33 +15385,33 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15619,37 +15619,37 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15867,37 +15867,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16115,37 +16115,37 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16363,37 +16363,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16611,37 +16611,37 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16859,37 +16859,37 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17107,37 +17107,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17355,37 +17355,37 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17603,37 +17603,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17851,37 +17851,37 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18099,37 +18099,37 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18347,37 +18347,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18595,37 +18595,37 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18843,37 +18843,37 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19091,37 +19091,37 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index 466276eea73be..69404247ccd6e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -122,27 +122,27 @@ define amdgpu_kernel void @global_system_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -303,27 +303,27 @@ define amdgpu_kernel void @global_system_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -494,29 +494,29 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -698,29 +698,29 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -880,25 +880,25 @@ define amdgpu_kernel void @global_system_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1033,25 +1033,25 @@ define amdgpu_kernel void @global_system_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1197,29 +1197,29 @@ define amdgpu_kernel void @global_system_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1379,29 +1379,29 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1548,25 +1548,25 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1716,29 +1716,29 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1892,29 +1892,29 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2089,33 +2089,33 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2300,33 +2300,33 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2509,31 +2509,31 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2718,35 +2718,35 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2949,35 +2949,35 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3183,33 +3183,33 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3416,37 +3416,37 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3657,37 +3657,37 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3919,41 +3919,41 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4195,41 +4195,41 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4460,37 +4460,37 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4707,37 +4707,37 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4965,41 +4965,41 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5241,41 +5241,41 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5517,41 +5517,41 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5793,41 +5793,41 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6059,37 +6059,37 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6318,39 +6318,39 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6596,43 +6596,43 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6896,43 +6896,43 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7185,39 +7185,39 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7456,39 +7456,39 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7734,43 +7734,43 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8034,43 +8034,43 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8334,43 +8334,43 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8634,43 +8634,43 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8934,43 +8934,43 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9230,43 +9230,43 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9530,43 +9530,43 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9830,43 +9830,43 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10073,27 +10073,27 @@ define amdgpu_kernel void @global_system_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10254,27 +10254,27 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10445,29 +10445,29 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10649,29 +10649,29 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10831,25 +10831,25 @@ define amdgpu_kernel void @global_system_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10984,25 +10984,25 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11148,29 +11148,29 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11330,29 +11330,29 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11499,25 +11499,25 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11667,29 +11667,29 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11843,29 +11843,29 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12040,33 +12040,33 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12251,33 +12251,33 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12460,31 +12460,31 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12669,35 +12669,35 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12900,35 +12900,35 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13134,33 +13134,33 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13367,37 +13367,37 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13608,37 +13608,37 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13870,41 +13870,41 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14146,41 +14146,41 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14411,37 +14411,37 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14658,37 +14658,37 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14916,41 +14916,41 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15192,41 +15192,41 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15468,41 +15468,41 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15744,41 +15744,41 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16020,41 +16020,41 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16296,41 +16296,41 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16572,41 +16572,41 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16848,41 +16848,41 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17114,37 +17114,37 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17373,39 +17373,39 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17640,41 +17640,41 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17928,43 +17928,43 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18228,43 +18228,43 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18517,39 +18517,39 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18788,39 +18788,39 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19066,43 +19066,43 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19366,43 +19366,43 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19666,43 +19666,43 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19966,43 +19966,43 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20266,43 +20266,43 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20562,43 +20562,43 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20862,43 +20862,43 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -21162,43 +21162,43 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 sc1 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll index 08682786f7b25..4b6c99282dc13 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -122,27 +122,27 @@ define amdgpu_kernel void @global_wavefront_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -303,27 +303,27 @@ define amdgpu_kernel void @global_wavefront_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -484,27 +484,27 @@ define amdgpu_kernel void @global_wavefront_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -665,27 +665,27 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -825,25 +825,25 @@ define amdgpu_kernel void @global_wavefront_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -978,25 +978,25 @@ define amdgpu_kernel void @global_wavefront_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1131,25 +1131,25 @@ define amdgpu_kernel void @global_wavefront_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1284,25 +1284,25 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1435,25 +1435,25 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1586,25 +1586,25 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1737,25 +1737,25 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1888,25 +1888,25 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2039,25 +2039,25 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2206,29 +2206,29 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2386,29 +2386,29 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2566,29 +2566,29 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2770,33 +2770,33 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2986,33 +2986,33 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3202,33 +3202,33 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3418,33 +3418,33 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3634,33 +3634,33 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3850,33 +3850,33 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4066,33 +4066,33 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4282,33 +4282,33 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4498,33 +4498,33 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4714,33 +4714,33 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4930,33 +4930,33 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5146,33 +5146,33 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5362,33 +5362,33 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5578,33 +5578,33 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5794,33 +5794,33 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6028,37 +6028,37 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6276,37 +6276,37 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6524,37 +6524,37 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6772,37 +6772,37 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7020,37 +7020,37 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7268,37 +7268,37 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7516,37 +7516,37 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7764,37 +7764,37 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8012,37 +8012,37 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8260,37 +8260,37 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8508,37 +8508,37 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8756,37 +8756,37 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9004,37 +9004,37 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9252,37 +9252,37 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9500,37 +9500,37 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9713,27 +9713,27 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9894,27 +9894,27 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10075,27 +10075,27 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10256,27 +10256,27 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10416,25 +10416,25 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10569,25 +10569,25 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10722,25 +10722,25 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10875,25 +10875,25 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11026,25 +11026,25 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11177,25 +11177,25 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11328,25 +11328,25 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11479,25 +11479,25 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11630,25 +11630,25 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11797,29 +11797,29 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11977,29 +11977,29 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12157,29 +12157,29 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12361,33 +12361,33 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12577,33 +12577,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12793,33 +12793,33 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13009,33 +13009,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13225,33 +13225,33 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13441,33 +13441,33 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13657,33 +13657,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13873,33 +13873,33 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14089,33 +14089,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14305,33 +14305,33 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14521,33 +14521,33 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14737,33 +14737,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14953,33 +14953,33 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15169,33 +15169,33 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15385,33 +15385,33 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15619,37 +15619,37 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15867,37 +15867,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16115,37 +16115,37 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16363,37 +16363,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16611,37 +16611,37 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16859,37 +16859,37 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17107,37 +17107,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17355,37 +17355,37 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17603,37 +17603,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17851,37 +17851,37 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18099,37 +18099,37 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18347,37 +18347,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18595,37 +18595,37 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18843,37 +18843,37 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19091,37 +19091,37 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index 0edf543f33f48..46d65187cb1b2 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -122,27 +122,27 @@ define amdgpu_kernel void @global_workgroup_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -303,27 +303,27 @@ define amdgpu_kernel void @global_workgroup_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -486,28 +486,28 @@ define amdgpu_kernel void @global_workgroup_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -676,28 +676,28 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -847,25 +847,25 @@ define amdgpu_kernel void @global_workgroup_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1000,25 +1000,25 @@ define amdgpu_kernel void @global_workgroup_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1161,27 +1161,27 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1332,27 +1332,27 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1493,25 +1493,25 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1648,27 +1648,27 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1813,27 +1813,27 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1986,29 +1986,29 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2165,29 +2165,29 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2350,30 +2350,30 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2543,32 +2543,32 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2748,32 +2748,32 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2967,33 +2967,33 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3187,35 +3187,35 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3417,35 +3417,35 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3655,37 +3655,37 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3899,37 +3899,37 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4135,35 +4135,35 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4361,35 +4361,35 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4595,37 +4595,37 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4839,37 +4839,37 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5083,37 +5083,37 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5327,37 +5327,37 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5571,37 +5571,37 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5815,37 +5815,37 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6059,37 +6059,37 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6303,37 +6303,37 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6553,37 +6553,37 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6803,38 +6803,38 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7062,39 +7062,39 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7330,40 +7330,40 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7603,40 +7603,40 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7868,38 +7868,38 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8123,38 +8123,38 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8384,40 +8384,40 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8657,40 +8657,40 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8930,40 +8930,40 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9203,40 +9203,40 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9476,40 +9476,40 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9747,40 +9747,40 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10020,40 +10020,40 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10293,40 +10293,40 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10521,27 +10521,27 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10702,27 +10702,27 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10885,28 +10885,28 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11072,28 +11072,28 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_load_dword v1, v0, s[2:3] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11242,25 +11242,25 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11395,25 +11395,25 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11551,26 +11551,26 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11714,26 +11714,26 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11872,25 +11872,25 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12027,27 +12027,27 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12187,26 +12187,26 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12352,28 +12352,28 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12523,28 +12523,28 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12705,30 +12705,30 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12893,31 +12893,31 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13090,31 +13090,31 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13306,33 +13306,33 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13526,35 +13526,35 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13751,34 +13751,34 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13981,36 +13981,36 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14217,36 +14217,36 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14450,35 +14450,35 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14676,35 +14676,35 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14905,36 +14905,36 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15141,36 +15141,36 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15377,36 +15377,36 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15613,36 +15613,36 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15849,36 +15849,36 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16085,36 +16085,36 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16321,36 +16321,36 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16557,36 +16557,36 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16804,37 +16804,37 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17054,38 +17054,38 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17308,38 +17308,38 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17568,39 +17568,39 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17833,39 +17833,39 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18095,38 +18095,38 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18350,38 +18350,38 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18606,39 +18606,39 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -18871,39 +18871,39 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19136,39 +19136,39 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19401,39 +19401,39 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19666,39 +19666,39 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -19929,39 +19929,39 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20194,39 +20194,39 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -20459,39 +20459,39 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NOTTGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll index 38b1e5407d3e6..0467c5047a0be 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -106,29 +106,29 @@ define amdgpu_kernel void @local_agent_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -277,29 +277,29 @@ define amdgpu_kernel void @local_agent_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -450,30 +450,30 @@ define amdgpu_kernel void @local_agent_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -634,32 +634,32 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -800,25 +800,25 @@ define amdgpu_kernel void @local_agent_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -940,25 +940,25 @@ define amdgpu_kernel void @local_agent_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1088,27 +1088,27 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1246,27 +1246,27 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1396,25 +1396,25 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1544,27 +1544,27 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1700,27 +1700,27 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1866,29 +1866,29 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2040,29 +2040,29 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2224,32 +2224,32 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2420,34 +2420,34 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2626,34 +2626,34 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2812,29 +2812,29 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2987,31 +2987,31 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3170,31 +3170,31 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3363,33 +3363,33 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3564,33 +3564,33 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3757,31 +3757,31 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3940,31 +3940,31 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4131,33 +4131,33 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4332,33 +4332,33 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4533,33 +4533,33 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4734,33 +4734,33 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4935,33 +4935,33 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5136,33 +5136,33 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5337,33 +5337,33 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5538,33 +5538,33 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5747,35 +5747,35 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5960,36 +5960,36 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6182,37 +6182,37 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6413,38 +6413,38 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6647,38 +6647,38 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6873,36 +6873,36 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7089,36 +7089,36 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7313,38 +7313,38 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7547,38 +7547,38 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7781,38 +7781,38 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8015,38 +8015,38 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8249,38 +8249,38 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8483,38 +8483,38 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8717,38 +8717,38 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8951,38 +8951,38 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9155,29 +9155,29 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9326,29 +9326,29 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9497,29 +9497,29 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9668,29 +9668,29 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9821,25 +9821,25 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9961,25 +9961,25 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10101,25 +10101,25 @@ define amdgpu_kernel void @local_agent_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10241,25 +10241,25 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10381,25 +10381,25 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10521,25 +10521,25 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10661,25 +10661,25 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10801,25 +10801,25 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10941,25 +10941,25 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11105,31 +11105,31 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11288,31 +11288,31 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11471,31 +11471,31 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11644,29 +11644,29 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11811,29 +11811,29 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11978,29 +11978,29 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12145,29 +12145,29 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12312,29 +12312,29 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12479,29 +12479,29 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12646,29 +12646,29 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12813,29 +12813,29 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12980,29 +12980,29 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13147,29 +13147,29 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13314,29 +13314,29 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13481,29 +13481,29 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13648,29 +13648,29 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13815,29 +13815,29 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13982,29 +13982,29 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14173,35 +14173,35 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14384,35 +14384,35 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14595,35 +14595,35 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14806,35 +14806,35 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15017,35 +15017,35 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15228,35 +15228,35 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15439,35 +15439,35 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15650,35 +15650,35 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15861,35 +15861,35 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16072,35 +16072,35 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16283,35 +16283,35 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16494,35 +16494,35 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16705,35 +16705,35 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16916,35 +16916,35 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17127,35 +17127,35 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll index e9be38d6d17a3..04b0f00fe77b5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -119,29 +119,29 @@ define amdgpu_kernel void @local_nontemporal_load_0( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_nontemporal_load_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_nontemporal_load_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_load_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_nontemporal_load_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_nontemporal_load_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -323,39 +323,39 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_nontemporal_load_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_nontemporal_load_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_load_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_nontemporal_load_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -520,29 +520,29 @@ define amdgpu_kernel void @local_nontemporal_store_0( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_nontemporal_store_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_nontemporal_store_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_store_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_nontemporal_store_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_nontemporal_store_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -703,37 +703,37 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_nontemporal_store_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_nontemporal_store_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_store_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_nontemporal_store_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, s1, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -911,29 +911,29 @@ define amdgpu_kernel void @local_nontemporal_volatile_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_nontemporal_volatile_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_nontemporal_volatile_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_nontemporal_volatile_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_nontemporal_volatile_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_nontemporal_volatile_load: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll index 31f36a42a2eda..f84d451f8ecb0 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -106,29 +106,29 @@ define amdgpu_kernel void @local_singlethread_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -277,29 +277,29 @@ define amdgpu_kernel void @local_singlethread_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -448,29 +448,29 @@ define amdgpu_kernel void @local_singlethread_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -619,29 +619,29 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -772,25 +772,25 @@ define amdgpu_kernel void @local_singlethread_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -912,25 +912,25 @@ define amdgpu_kernel void @local_singlethread_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1052,25 +1052,25 @@ define amdgpu_kernel void @local_singlethread_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1192,25 +1192,25 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1332,25 +1332,25 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1472,25 +1472,25 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1612,25 +1612,25 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1752,25 +1752,25 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1892,25 +1892,25 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2056,31 +2056,31 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2239,31 +2239,31 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2422,31 +2422,31 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2595,29 +2595,29 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2762,29 +2762,29 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2929,29 +2929,29 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3096,29 +3096,29 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3263,29 +3263,29 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3430,29 +3430,29 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3597,29 +3597,29 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3764,29 +3764,29 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3931,29 +3931,29 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4098,29 +4098,29 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4265,29 +4265,29 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4432,29 +4432,29 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4599,29 +4599,29 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4766,29 +4766,29 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4933,29 +4933,29 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5124,35 +5124,35 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5335,35 +5335,35 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5546,35 +5546,35 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5757,35 +5757,35 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5968,35 +5968,35 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6179,35 +6179,35 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6390,35 +6390,35 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6601,35 +6601,35 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6812,35 +6812,35 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7023,35 +7023,35 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7234,35 +7234,35 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7445,35 +7445,35 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7656,35 +7656,35 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7867,35 +7867,35 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8078,35 +8078,35 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8269,29 +8269,29 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8440,29 +8440,29 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8611,29 +8611,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8782,29 +8782,29 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8935,25 +8935,25 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9075,25 +9075,25 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9215,25 +9215,25 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9355,25 +9355,25 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9495,25 +9495,25 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9635,25 +9635,25 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9775,25 +9775,25 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9915,25 +9915,25 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10055,25 +10055,25 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10219,31 +10219,31 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10402,31 +10402,31 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10585,31 +10585,31 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10758,29 +10758,29 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10925,29 +10925,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11092,29 +11092,29 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11259,29 +11259,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11426,29 +11426,29 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11593,29 +11593,29 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11760,29 +11760,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11927,29 +11927,29 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12094,29 +12094,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12261,29 +12261,29 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12428,29 +12428,29 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12595,29 +12595,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12762,29 +12762,29 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12929,29 +12929,29 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13096,29 +13096,29 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13287,35 +13287,35 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13498,35 +13498,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13709,35 +13709,35 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13920,35 +13920,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14131,35 +14131,35 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14342,35 +14342,35 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14553,35 +14553,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14764,35 +14764,35 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14975,35 +14975,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15186,35 +15186,35 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15397,35 +15397,35 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15608,35 +15608,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15819,35 +15819,35 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16030,35 +16030,35 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16241,35 +16241,35 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll index f2e3e7bf41768..74a297241d851 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -106,29 +106,29 @@ define amdgpu_kernel void @local_system_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -277,29 +277,29 @@ define amdgpu_kernel void @local_system_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -450,30 +450,30 @@ define amdgpu_kernel void @local_system_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -634,32 +634,32 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -800,25 +800,25 @@ define amdgpu_kernel void @local_system_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -940,25 +940,25 @@ define amdgpu_kernel void @local_system_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1088,27 +1088,27 @@ define amdgpu_kernel void @local_system_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1246,27 +1246,27 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1396,25 +1396,25 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1544,27 +1544,27 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1700,27 +1700,27 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1866,29 +1866,29 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2040,29 +2040,29 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2224,32 +2224,32 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2420,34 +2420,34 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2626,34 +2626,34 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2812,29 +2812,29 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2987,31 +2987,31 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3170,31 +3170,31 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3363,33 +3363,33 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3564,33 +3564,33 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3757,31 +3757,31 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3940,31 +3940,31 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4131,33 +4131,33 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4332,33 +4332,33 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4533,33 +4533,33 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4734,33 +4734,33 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4935,33 +4935,33 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5136,33 +5136,33 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5337,33 +5337,33 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5538,33 +5538,33 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5747,35 +5747,35 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5960,36 +5960,36 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6182,37 +6182,37 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6413,38 +6413,38 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6647,38 +6647,38 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6873,36 +6873,36 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7089,36 +7089,36 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7313,38 +7313,38 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7547,38 +7547,38 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7781,38 +7781,38 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8015,38 +8015,38 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8249,38 +8249,38 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8483,38 +8483,38 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8717,38 +8717,38 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8951,38 +8951,38 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9155,29 +9155,29 @@ define amdgpu_kernel void @local_system_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9326,29 +9326,29 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9497,29 +9497,29 @@ define amdgpu_kernel void @local_system_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9668,29 +9668,29 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9821,25 +9821,25 @@ define amdgpu_kernel void @local_system_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9961,25 +9961,25 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10101,25 +10101,25 @@ define amdgpu_kernel void @local_system_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10241,25 +10241,25 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10381,25 +10381,25 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10521,25 +10521,25 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10661,25 +10661,25 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10801,25 +10801,25 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10941,25 +10941,25 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11105,31 +11105,31 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11288,31 +11288,31 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11471,31 +11471,31 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11644,29 +11644,29 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11811,29 +11811,29 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11978,29 +11978,29 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12145,29 +12145,29 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12312,29 +12312,29 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12479,29 +12479,29 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12646,29 +12646,29 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12813,29 +12813,29 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12980,29 +12980,29 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13147,29 +13147,29 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13314,29 +13314,29 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13481,29 +13481,29 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13648,29 +13648,29 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13815,29 +13815,29 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13982,29 +13982,29 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14173,35 +14173,35 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14384,35 +14384,35 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14595,35 +14595,35 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14806,35 +14806,35 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15017,35 +15017,35 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15228,35 +15228,35 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15439,35 +15439,35 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15650,35 +15650,35 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15861,35 +15861,35 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16072,35 +16072,35 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16283,35 +16283,35 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16494,35 +16494,35 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16705,35 +16705,35 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16916,35 +16916,35 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17127,35 +17127,35 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll index 056559834ca1c..b24622a48a16b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -106,29 +106,29 @@ define amdgpu_kernel void @local_wavefront_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -277,29 +277,29 @@ define amdgpu_kernel void @local_wavefront_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -448,29 +448,29 @@ define amdgpu_kernel void @local_wavefront_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -619,29 +619,29 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -772,25 +772,25 @@ define amdgpu_kernel void @local_wavefront_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -912,25 +912,25 @@ define amdgpu_kernel void @local_wavefront_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1052,25 +1052,25 @@ define amdgpu_kernel void @local_wavefront_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1192,25 +1192,25 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1332,25 +1332,25 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1472,25 +1472,25 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1612,25 +1612,25 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1752,25 +1752,25 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1892,25 +1892,25 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2056,31 +2056,31 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2239,31 +2239,31 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2422,31 +2422,31 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2595,29 +2595,29 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2762,29 +2762,29 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2929,29 +2929,29 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3096,29 +3096,29 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3263,29 +3263,29 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3430,29 +3430,29 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3597,29 +3597,29 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3764,29 +3764,29 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3931,29 +3931,29 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4098,29 +4098,29 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4265,29 +4265,29 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4432,29 +4432,29 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4599,29 +4599,29 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4766,29 +4766,29 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4933,29 +4933,29 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5124,35 +5124,35 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5335,35 +5335,35 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5546,35 +5546,35 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5757,35 +5757,35 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5968,35 +5968,35 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6179,35 +6179,35 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6390,35 +6390,35 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6601,35 +6601,35 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6812,35 +6812,35 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7023,35 +7023,35 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7234,35 +7234,35 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7445,35 +7445,35 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7656,35 +7656,35 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7867,35 +7867,35 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8078,35 +8078,35 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8269,29 +8269,29 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8440,29 +8440,29 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8611,29 +8611,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8782,29 +8782,29 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8935,25 +8935,25 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9075,25 +9075,25 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9215,25 +9215,25 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9355,25 +9355,25 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9495,25 +9495,25 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9635,25 +9635,25 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9775,25 +9775,25 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9915,25 +9915,25 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10055,25 +10055,25 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10219,31 +10219,31 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10402,31 +10402,31 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10585,31 +10585,31 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10758,29 +10758,29 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10925,29 +10925,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11092,29 +11092,29 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11259,29 +11259,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11426,29 +11426,29 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11593,29 +11593,29 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11760,29 +11760,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11927,29 +11927,29 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12094,29 +12094,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12261,29 +12261,29 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12428,29 +12428,29 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12595,29 +12595,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12762,29 +12762,29 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12929,29 +12929,29 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13096,29 +13096,29 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13287,35 +13287,35 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13498,35 +13498,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13709,35 +13709,35 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13920,35 +13920,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14131,35 +14131,35 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14342,35 +14342,35 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14553,35 +14553,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14764,35 +14764,35 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14975,35 +14975,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15186,35 +15186,35 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15397,35 +15397,35 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15608,35 +15608,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15819,35 +15819,35 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16030,35 +16030,35 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16241,35 +16241,35 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll index 0c79e0bfca9df..62d7f4801baf8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -106,29 +106,29 @@ define amdgpu_kernel void @local_workgroup_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -277,29 +277,29 @@ define amdgpu_kernel void @local_workgroup_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -450,30 +450,30 @@ define amdgpu_kernel void @local_workgroup_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -634,32 +634,32 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -800,25 +800,25 @@ define amdgpu_kernel void @local_workgroup_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -940,25 +940,25 @@ define amdgpu_kernel void @local_workgroup_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1088,27 +1088,27 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1246,27 +1246,27 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1396,25 +1396,25 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1544,27 +1544,27 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1700,27 +1700,27 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -1866,29 +1866,29 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2040,29 +2040,29 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2224,32 +2224,32 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2420,34 +2420,34 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2626,34 +2626,34 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2812,29 +2812,29 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -2987,31 +2987,31 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3170,31 +3170,31 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3363,33 +3363,33 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3564,33 +3564,33 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3757,31 +3757,31 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -3940,31 +3940,31 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4131,33 +4131,33 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4332,33 +4332,33 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4533,33 +4533,33 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4734,33 +4734,33 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -4935,33 +4935,33 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5136,33 +5136,33 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5337,33 +5337,33 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5538,33 +5538,33 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5747,35 +5747,35 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -5960,36 +5960,36 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6182,37 +6182,37 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6413,38 +6413,38 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6647,38 +6647,38 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -6873,36 +6873,36 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7089,36 +7089,36 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7313,38 +7313,38 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7547,38 +7547,38 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -7781,38 +7781,38 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8015,38 +8015,38 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8249,38 +8249,38 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8483,38 +8483,38 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8717,38 +8717,38 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -8951,38 +8951,38 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: buffer_inv sc0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: buffer_inv sc0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9155,29 +9155,29 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_unordered_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_unordered_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_unordered_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_unordered_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_unordered_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9326,29 +9326,29 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9497,29 +9497,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9668,29 +9668,29 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: ds_read_b32 v1, v0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: ds_read_b32 v1, v0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_load: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9821,25 +9821,25 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_unordered_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_unordered_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_unordered_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_unordered_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_unordered_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -9961,25 +9961,25 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10101,25 +10101,25 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10241,25 +10241,25 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_store: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_store: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_store: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_store: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_store: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10381,25 +10381,25 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10521,25 +10521,25 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10661,25 +10661,25 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10801,25 +10801,25 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -10941,25 +10941,25 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11105,31 +11105,31 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11288,31 +11288,31 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11471,31 +11471,31 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11644,29 +11644,29 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11811,29 +11811,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -11978,29 +11978,29 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12145,29 +12145,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12312,29 +12312,29 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12479,29 +12479,29 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12646,29 +12646,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12813,29 +12813,29 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -12980,29 +12980,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13147,29 +13147,29 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13314,29 +13314,29 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13481,29 +13481,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13648,29 +13648,29 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13815,29 +13815,29 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -13982,29 +13982,29 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-TGSPLIT-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14173,35 +14173,35 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14384,35 +14384,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14595,35 +14595,35 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -14806,35 +14806,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15017,35 +15017,35 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15228,35 +15228,35 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15439,35 +15439,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15650,35 +15650,35 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -15861,35 +15861,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16072,35 +16072,35 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16283,35 +16283,35 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16494,35 +16494,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16705,35 +16705,35 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -16916,35 +16916,35 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry @@ -17127,35 +17127,35 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: ds_write_b32 v0, v1 ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm -; -; GFX940-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-NOTTGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-NOTTGSPLIT-NEXT: s_endpgm +; +; GFX942-TGSPLIT-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[4:5], 0x4 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s1 +; GFX942-TGSPLIT-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: ds_write_b32 v0, v1 +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index 6feab49ed86b6..fceee413f3f97 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -6,8 +6,8 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942-NOTTGSPLIT %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx942 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX942-TGSPLIT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s @@ -133,27 +133,27 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: private_nontemporal_load_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v1, off, s2 nt -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_load_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: scratch_load_dword v1, off, s2 nt +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: private_nontemporal_load_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: scratch_load_dword v1, off, s2 nt -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: private_nontemporal_load_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: scratch_load_dword v1, off, s2 nt +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_load_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -345,39 +345,39 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: private_nontemporal_load_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 -; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v1, v1, off nt -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_load_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-NOTTGSPLIT-NEXT: scratch_load_dword v1, v1, off nt +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: private_nontemporal_load_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s2, 2 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 -; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 -; GFX940-TGSPLIT-NEXT: scratch_load_dword v1, v1, off nt -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: private_nontemporal_load_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-TGSPLIT-NEXT: s_load_dword s3, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v1, v1, s2 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s2, 2 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v2, s3 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v1, s2, v2 +; GFX942-TGSPLIT-NEXT: scratch_load_dword v1, v1, off nt +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_load_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -556,27 +556,27 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; GFX90A-TGSPLIT-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: private_nontemporal_store_0: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword off, v0, s0 sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_store_0: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s1, s[2:3], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NOTTGSPLIT-NEXT: scratch_store_dword off, v0, s0 nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: private_nontemporal_store_0: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-TGSPLIT-NEXT: scratch_store_dword off, v0, s0 sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: private_nontemporal_store_0: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s1, s[2:3], 0x0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-TGSPLIT-NEXT: scratch_store_dword off, v0, s0 nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_store_0: ; GFX11-WGP: ; %bb.0: ; %entry @@ -747,37 +747,37 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX90A-TGSPLIT-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen glc slc ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: private_nontemporal_store_1: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX940-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX940-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword v1, v0, off sc0 nt sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_store_1: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 0x3ff +; GFX942-NOTTGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 +; GFX942-NOTTGSPLIT-NEXT: s_mov_b32 s1, 2 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-NOTTGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NOTTGSPLIT-NEXT: scratch_store_dword v1, v0, off nt +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: private_nontemporal_store_1: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff -; GFX940-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX940-TGSPLIT-NEXT: s_mov_b32 s1, 2 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 -; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: scratch_store_dword v1, v0, off sc0 nt sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: private_nontemporal_store_1: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 0x3ff +; GFX942-TGSPLIT-NEXT: v_and_b32_e64 v0, v0, s1 +; GFX942-TGSPLIT-NEXT: s_mov_b32 s1, 2 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX942-TGSPLIT-NEXT: v_lshl_add_u32 v1, v0, s1, v1 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-TGSPLIT-NEXT: scratch_store_dword v1, v0, off nt +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_store_1: ; GFX11-WGP: ; %bb.0: ; %entry @@ -969,27 +969,27 @@ define amdgpu_kernel void @private_nontemporal_volatile_load( ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[4:5] ; GFX90A-TGSPLIT-NEXT: s_endpgm ; -; GFX940-NOTTGSPLIT-LABEL: private_nontemporal_volatile_load: -; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry -; GFX940-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v1, off, s2 sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NOTTGSPLIT-NEXT: s_endpgm +; GFX942-NOTTGSPLIT-LABEL: private_nontemporal_volatile_load: +; GFX942-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX942-NOTTGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-NOTTGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: scratch_load_dword v1, off, s2 sc0 sc1 +; GFX942-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NOTTGSPLIT-NEXT: s_endpgm ; -; GFX940-TGSPLIT-LABEL: private_nontemporal_volatile_load: -; GFX940-TGSPLIT: ; %bb.0: ; %entry -; GFX940-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 -; GFX940-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: scratch_load_dword v1, off, s2 sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-TGSPLIT-NEXT: s_endpgm +; GFX942-TGSPLIT-LABEL: private_nontemporal_volatile_load: +; GFX942-TGSPLIT: ; %bb.0: ; %entry +; GFX942-TGSPLIT-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX942-TGSPLIT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; GFX942-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-TGSPLIT-NEXT: scratch_load_dword v1, off, s2 sc0 sc1 +; GFX942-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX942-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_volatile_load: ; GFX11-WGP: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll index e0708a55f438b..322686b0144a0 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX908 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll index d5aeff7e819dd..9d329a2d121ed 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,GFX908_A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A,GFX940_A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940,GFX940_A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A,GFX942_A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX942,GFX942_A %s ; GCN-LABEL: {{^}}test_mfma_loop_zeroinit: @@ -11,7 +11,7 @@ ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -51,7 +51,7 @@ exit: ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -85,7 +85,7 @@ exit: ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -213,13 +213,13 @@ exit: ; FIXME: Constant is now in VGPR instead of SGPR. -; GFX940_A: v_mov_b32_e32 v{{[0-9]+}}, 0x4{{[0-9a-f]+}} -; GFX940_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GFX942_A: v_mov_b32_e32 v{{[0-9]+}}, 0x4{{[0-9a-f]+}} +; GFX942_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -252,7 +252,7 @@ exit: ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -317,12 +317,12 @@ exit: ; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} ; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] -; GFX940_A-COUNT-32: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}} +; GFX942_A-COUNT-32: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}} ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -385,7 +385,7 @@ exit: ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v0 ; GFX908-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}} -; GFX940_A-DAG: s_load_dword [[TMP:s[0-9]+]], +; GFX942_A-DAG: s_load_dword [[TMP:s[0-9]+]], ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]] ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} ; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}} @@ -424,7 +424,7 @@ exit: ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -462,13 +462,13 @@ exit: ; GFX90A-NOT: v_accvgpr ; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} ; GFX90A-NOT: v_accvgpr -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} ; GCN-NOT: v_accvgpr ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -503,17 +503,17 @@ exit: ; GFX90A-NOT: v_accvgpr ; GFX90A: v_mfma_f32_32x32x1f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} ; GFX90A-NOT: v_accvgpr -; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} +; GFX942: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} ; Check that we are using only one tmp VGPR. ; GFX908: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a{{[0-9]+}} -; GFX940_A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} +; GFX942_A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, a{{[0-9]+}} ; GCN: [[LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[LOOP]] @@ -587,7 +587,7 @@ exit: ; GCN: [[INNER_LOOP:.LBB[0-9_]+]]: ; GCN-NOT: v_accvgpr ; GFX908_A: v_mfma_f32_32x32x1f32 -; GFX940: v_mfma_f32_32x32x1_2b_f32 +; GFX942: v_mfma_f32_32x32x1_2b_f32 ; GCN-NOT: v_accvgpr ; GCN: s_cbranch_scc1 [[INNER_LOOP]] ; GCN-NOT: v_accvgpr diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll index 8dbbab3c57f72..54b535ca43126 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY908 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY90A %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -early-live-intervals -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY90A %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY90A %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY,GREEDY90A-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -sgpr-regalloc=fast -vgpr-regalloc=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,FAST %s diff --git a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx942.ll similarity index 99% rename from llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll rename to llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx942.ll index b48152dad99ac..6b7f33b7bc773 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx942.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32) declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll index bffd15872c42c..ccf97dd8c7f82 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/neighboring-mfma-padding.mir b/llvm/test/CodeGen/AMDGPU/neighboring-mfma-padding.mir index bf2cf6aeb990d..df3dd7292b7f8 100644 --- a/llvm/test/CodeGen/AMDGPU/neighboring-mfma-padding.mir +++ b/llvm/test/CodeGen/AMDGPU/neighboring-mfma-padding.mir @@ -9,9 +9,9 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-padding-ratio=50 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx90a-PAD50 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-padding-ratio=100 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx90a-PAD100 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx940-DEFAULT %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-mfma-padding-ratio=50 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx940-PAD50 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-mfma-padding-ratio=100 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx940-PAD100 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx942-DEFAULT %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-padding-ratio=50 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx942-PAD50 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-padding-ratio=100 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=gfx942-PAD100 %s --- name: mfma_padding_2_pass @@ -54,20 +54,20 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 1 ; gfx90a-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_2_pass - ; gfx940-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: S_NOP 1 - ; gfx940-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_2_pass + ; gfx942-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: S_NOP 1 + ; gfx942-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD50-LABEL: name: mfma_padding_2_pass - ; gfx940-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 1 - ; gfx940-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-LABEL: name: mfma_padding_2_pass + ; gfx942-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 1 + ; gfx942-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD100-LABEL: name: mfma_padding_2_pass - ; gfx940-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 1 - ; gfx940-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-LABEL: name: mfma_padding_2_pass + ; gfx942-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 1 + ; gfx942-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... @@ -118,23 +118,23 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 0 ; gfx90a-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_2_pass_1_intervening_valu - ; gfx940-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: S_NOP 0 - ; gfx940-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD50-LABEL: name: mfma_padding_2_pass_1_intervening_valu - ; gfx940-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 0 - ; gfx940-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD100-LABEL: name: mfma_padding_2_pass_1_intervening_valu - ; gfx940-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 0 - ; gfx940-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_2_pass_1_intervening_valu + ; gfx942-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: S_NOP 0 + ; gfx942-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD50-LABEL: name: mfma_padding_2_pass_1_intervening_valu + ; gfx942-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 0 + ; gfx942-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD100-LABEL: name: mfma_padding_2_pass_1_intervening_valu + ; gfx942-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 0 + ; gfx942-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $vgpr2 = V_MOV_B32_e32 1, implicit $exec $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec @@ -189,23 +189,23 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 1 ; gfx90a-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_2_pass_dbg - ; gfx940-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: DBG_VALUE - ; gfx940-DEFAULT-NEXT: S_NOP 1 - ; gfx940-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD50-LABEL: name: mfma_padding_2_pass_dbg - ; gfx940-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: DBG_VALUE - ; gfx940-PAD50-NEXT: S_NOP 1 - ; gfx940-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD100-LABEL: name: mfma_padding_2_pass_dbg - ; gfx940-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: DBG_VALUE - ; gfx940-PAD100-NEXT: S_NOP 1 - ; gfx940-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_2_pass_dbg + ; gfx942-DEFAULT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: DBG_VALUE + ; gfx942-DEFAULT-NEXT: S_NOP 1 + ; gfx942-DEFAULT-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD50-LABEL: name: mfma_padding_2_pass_dbg + ; gfx942-PAD50: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: DBG_VALUE + ; gfx942-PAD50-NEXT: S_NOP 1 + ; gfx942-PAD50-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD100-LABEL: name: mfma_padding_2_pass_dbg + ; gfx942-PAD100: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: DBG_VALUE + ; gfx942-PAD100-NEXT: S_NOP 1 + ; gfx942-PAD100-NEXT: $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec DBG_VALUE $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec @@ -253,19 +253,19 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 7 ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_8_pass - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_8_pass + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD50-LABEL: name: mfma_padding_8_pass - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 3 - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-LABEL: name: mfma_padding_8_pass + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 3 + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD100-LABEL: name: mfma_padding_8_pass - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 7 - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-LABEL: name: mfma_padding_8_pass + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 7 + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ... @@ -327,25 +327,25 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 5 ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_8_pass_2_intervening_valu - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD50-LABEL: name: mfma_padding_8_pass_2_intervening_valu - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 1 - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD100-LABEL: name: mfma_padding_8_pass_2_intervening_valu - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 5 - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_8_pass_2_intervening_valu + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD50-LABEL: name: mfma_padding_8_pass_2_intervening_valu + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 1 + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD100-LABEL: name: mfma_padding_8_pass_2_intervening_valu + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 5 + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $vgpr2 = V_MOV_B32_e32 1, implicit $exec $vgpr3 = V_MOV_B32_e32 1, implicit $exec @@ -397,20 +397,20 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 7 ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_16_pass - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_16_pass + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD50-LABEL: name: mfma_padding_16_pass - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 7 - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-LABEL: name: mfma_padding_16_pass + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 7 + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD100-LABEL: name: mfma_padding_16_pass - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 7 - ; gfx940-PAD100-NEXT: S_NOP 7 - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-LABEL: name: mfma_padding_16_pass + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 7 + ; gfx942-PAD100-NEXT: S_NOP 7 + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ... @@ -490,32 +490,32 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 3 ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_16_pass_4_intervening_valu - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD50-LABEL: name: mfma_padding_16_pass_4_intervening_valu - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 3 - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD100-LABEL: name: mfma_padding_16_pass_4_intervening_valu - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 7 - ; gfx940-PAD100-NEXT: S_NOP 3 - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_16_pass_4_intervening_valu + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD50-LABEL: name: mfma_padding_16_pass_4_intervening_valu + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 3 + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD100-LABEL: name: mfma_padding_16_pass_4_intervening_valu + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 7 + ; gfx942-PAD100-NEXT: S_NOP 3 + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $vgpr2 = V_MOV_B32_e32 1, implicit $exec $vgpr3 = V_MOV_B32_e32 1, implicit $exec @@ -688,65 +688,65 @@ body: | ; gfx90a-PAD100-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_16_pass_16_intervening_valu - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD50-LABEL: name: mfma_padding_16_pass_16_intervening_valu - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; - ; gfx940-PAD100-LABEL: name: mfma_padding_16_pass_16_intervening_valu - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_16_pass_16_intervening_valu + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD50-LABEL: name: mfma_padding_16_pass_16_intervening_valu + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; + ; gfx942-PAD100-LABEL: name: mfma_padding_16_pass_16_intervening_valu + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr7 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr9 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr10 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr11 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr12 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr13 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr14 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr15 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr16 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: $vgpr17 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $vgpr2 = V_MOV_B32_e32 1, implicit $exec $vgpr3 = V_MOV_B32_e32 1, implicit $exec @@ -805,17 +805,17 @@ body: | ; gfx90a-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_16_pass_occ_1 - ; gfx940-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_16_pass_occ_1 + ; gfx942-DEFAULT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD50-LABEL: name: mfma_padding_16_pass_occ_1 - ; gfx940-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-LABEL: name: mfma_padding_16_pass_occ_1 + ; gfx942-PAD50: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD100-LABEL: name: mfma_padding_16_pass_occ_1 - ; gfx940-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-LABEL: name: mfma_padding_16_pass_occ_1 + ; gfx942-PAD100: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ... @@ -960,56 +960,56 @@ body: | ; gfx90a-PAD100-NEXT: S_NOP 5 ; gfx90a-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-DEFAULT-LABEL: name: mfma_padding_16_pass_2_preds - ; gfx940-DEFAULT: bb.0: - ; gfx940-DEFAULT-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; gfx940-DEFAULT-NEXT: {{ $}} - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-DEFAULT-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc - ; gfx940-DEFAULT-NEXT: {{ $}} - ; gfx940-DEFAULT-NEXT: bb.1: - ; gfx940-DEFAULT-NEXT: successors: %bb.2(0x80000000) - ; gfx940-DEFAULT-NEXT: {{ $}} - ; gfx940-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: {{ $}} - ; gfx940-DEFAULT-NEXT: bb.2: - ; gfx940-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-LABEL: name: mfma_padding_16_pass_2_preds + ; gfx942-DEFAULT: bb.0: + ; gfx942-DEFAULT-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; gfx942-DEFAULT-NEXT: {{ $}} + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-DEFAULT-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc + ; gfx942-DEFAULT-NEXT: {{ $}} + ; gfx942-DEFAULT-NEXT: bb.1: + ; gfx942-DEFAULT-NEXT: successors: %bb.2(0x80000000) + ; gfx942-DEFAULT-NEXT: {{ $}} + ; gfx942-DEFAULT-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: {{ $}} + ; gfx942-DEFAULT-NEXT: bb.2: + ; gfx942-DEFAULT-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-DEFAULT-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD50-LABEL: name: mfma_padding_16_pass_2_preds - ; gfx940-PAD50: bb.0: - ; gfx940-PAD50-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; gfx940-PAD50-NEXT: {{ $}} - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD50-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc - ; gfx940-PAD50-NEXT: {{ $}} - ; gfx940-PAD50-NEXT: bb.1: - ; gfx940-PAD50-NEXT: successors: %bb.2(0x80000000) - ; gfx940-PAD50-NEXT: {{ $}} - ; gfx940-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: {{ $}} - ; gfx940-PAD50-NEXT: bb.2: - ; gfx940-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD50-NEXT: S_NOP 5 - ; gfx940-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-LABEL: name: mfma_padding_16_pass_2_preds + ; gfx942-PAD50: bb.0: + ; gfx942-PAD50-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; gfx942-PAD50-NEXT: {{ $}} + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD50-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc + ; gfx942-PAD50-NEXT: {{ $}} + ; gfx942-PAD50-NEXT: bb.1: + ; gfx942-PAD50-NEXT: successors: %bb.2(0x80000000) + ; gfx942-PAD50-NEXT: {{ $}} + ; gfx942-PAD50-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: {{ $}} + ; gfx942-PAD50-NEXT: bb.2: + ; gfx942-PAD50-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD50-NEXT: S_NOP 5 + ; gfx942-PAD50-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec ; - ; gfx940-PAD100-LABEL: name: mfma_padding_16_pass_2_preds - ; gfx940-PAD100: bb.0: - ; gfx940-PAD100-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; gfx940-PAD100-NEXT: {{ $}} - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec - ; gfx940-PAD100-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc - ; gfx940-PAD100-NEXT: {{ $}} - ; gfx940-PAD100-NEXT: bb.1: - ; gfx940-PAD100-NEXT: successors: %bb.2(0x80000000) - ; gfx940-PAD100-NEXT: {{ $}} - ; gfx940-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: {{ $}} - ; gfx940-PAD100-NEXT: bb.2: - ; gfx940-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec - ; gfx940-PAD100-NEXT: S_NOP 7 - ; gfx940-PAD100-NEXT: S_NOP 5 - ; gfx940-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-LABEL: name: mfma_padding_16_pass_2_preds + ; gfx942-PAD100: bb.0: + ; gfx942-PAD100-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; gfx942-PAD100-NEXT: {{ $}} + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + ; gfx942-PAD100-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc + ; gfx942-PAD100-NEXT: {{ $}} + ; gfx942-PAD100-NEXT: bb.1: + ; gfx942-PAD100-NEXT: successors: %bb.2(0x80000000) + ; gfx942-PAD100-NEXT: {{ $}} + ; gfx942-PAD100-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: {{ $}} + ; gfx942-PAD100-NEXT: bb.2: + ; gfx942-PAD100-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec + ; gfx942-PAD100-NEXT: S_NOP 7 + ; gfx942-PAD100-NEXT: S_NOP 5 + ; gfx942-PAD100-NEXT: early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec bb.0: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec S_CBRANCH_VCCZ %bb.2, implicit undef $vcc diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index 0833dada43e4d..b59f3c0d410f8 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s ; GCN-LABEL: {{^}}fadd_v2_vv: ; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir index b1ec70d89fa43..d070a8ef5dd2d 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s --- name: fold_simm_virtual diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir index 057769372c041..b98cb5d037918 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -run-pass=peephole-opt -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass=peephole-opt -o - %s | FileCheck %s --- name: reg_sequence_extract_subreg_sub0_from_regsequence_sub0_sub1 diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll index aeb7faade4715..85839bc472dcf 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-IR-lowering.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -S < %s | FileCheck -check-prefix=NO-PRELOAD %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s | FileCheck -check-prefix=PRELOAD %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -S < %s | FileCheck -check-prefix=NO-PRELOAD %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s | FileCheck -check-prefix=PRELOAD %s define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) %out) { ; NO-PRELOAD-LABEL: define amdgpu_kernel void @preload_block_count_x( diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll index 31beb7a3cce24..56523ea9761cd 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90a %s define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_block_count_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB0_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB0_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB0_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB0_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_x: ; GFX90a: ; %bb.1: @@ -37,20 +37,20 @@ define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg %out) #0 } define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) inreg %out, i32 inreg) #0 { -; GFX940-LABEL: preload_unused_arg_block_count_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s6, s[0:1], 0x10 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB1_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB1_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s6 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_unused_arg_block_count_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s6, s[0:1], 0x10 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB1_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB1_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s6 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_unused_arg_block_count_x: ; GFX90a: ; %bb.1: @@ -73,20 +73,20 @@ define amdgpu_kernel void @preload_unused_arg_block_count_x(ptr addrspace(1) inr } define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) inreg %out, i256 inreg) { -; GFX940-LABEL: no_free_sgprs_block_count_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB2_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB2_0: -; GFX940-NEXT: s_load_dword s0, s[4:5], 0x28 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: no_free_sgprs_block_count_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB2_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB2_0: +; GFX942-NEXT: s_load_dword s0, s[4:5], 0x28 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[8:9] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: no_free_sgprs_block_count_x: ; GFX90a: ; %bb.1: @@ -109,15 +109,15 @@ define amdgpu_kernel void @no_free_sgprs_block_count_x(ptr addrspace(1) inreg %o } define amdgpu_kernel void @no_inreg_block_count_x(ptr addrspace(1) %out) #0 { -; GFX940-LABEL: no_inreg_block_count_x: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: no_inreg_block_count_x: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: no_inreg_block_count_x: ; GFX90a: ; %bb.0: @@ -138,15 +138,15 @@ define amdgpu_kernel void @no_inreg_block_count_x(ptr addrspace(1) %out) #0 { ; args are inreg (preloaded). define amdgpu_kernel void @mixed_inreg_block_count_x(ptr addrspace(1) %out, i32 inreg) #0 { -; GFX940-LABEL: mixed_inreg_block_count_x: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x10 -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: mixed_inreg_block_count_x: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x10 +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: mixed_inreg_block_count_x: ; GFX90a: ; %bb.0: @@ -164,20 +164,20 @@ define amdgpu_kernel void @mixed_inreg_block_count_x(ptr addrspace(1) %out, i32 } define amdgpu_kernel void @incorrect_type_i64_block_count_x(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: incorrect_type_i64_block_count_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB5_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB5_0: -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: incorrect_type_i64_block_count_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB5_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB5_0: +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: incorrect_type_i64_block_count_x: ; GFX90a: ; %bb.1: @@ -200,20 +200,20 @@ define amdgpu_kernel void @incorrect_type_i64_block_count_x(ptr addrspace(1) inr } define amdgpu_kernel void @incorrect_type_i16_block_count_x(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: incorrect_type_i16_block_count_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB6_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB6_0: -; GFX940-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: incorrect_type_i16_block_count_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB6_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB6_0: +; GFX942-NEXT: s_load_dword s0, s[0:1], 0x8 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: incorrect_type_i16_block_count_x: ; GFX90a: ; %bb.1: @@ -236,19 +236,19 @@ define amdgpu_kernel void @incorrect_type_i16_block_count_x(ptr addrspace(1) inr } define amdgpu_kernel void @preload_block_count_y(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_block_count_y: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB7_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB7_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_y: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB7_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB7_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_y: ; GFX90a: ; %bb.1: @@ -271,21 +271,21 @@ define amdgpu_kernel void @preload_block_count_y(ptr addrspace(1) inreg %out) #0 } define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: random_incorrect_offset: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB8_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB8_0: -; GFX940-NEXT: s_mov_b32 s4, 8 -; GFX940-NEXT: s_load_dword s0, s[0:1], s4 offset:0x2 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: random_incorrect_offset: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB8_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB8_0: +; GFX942-NEXT: s_mov_b32 s4, 8 +; GFX942-NEXT: s_load_dword s0, s[0:1], s4 offset:0x2 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: random_incorrect_offset: ; GFX90a: ; %bb.1: @@ -310,20 +310,20 @@ define amdgpu_kernel void @random_incorrect_offset(ptr addrspace(1) inreg %out) } define amdgpu_kernel void @preload_block_count_z(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_block_count_z: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s6, s[0:1], 0x10 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB9_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB9_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s6 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_z: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s6, s[0:1], 0x10 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB9_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB9_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s6 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_z: ; GFX90a: ; %bb.1: @@ -347,22 +347,22 @@ define amdgpu_kernel void @preload_block_count_z(ptr addrspace(1) inreg %out) #0 } define amdgpu_kernel void @preload_block_count_x_imparg_align_ptr_i8(ptr addrspace(1) inreg %out, i8 inreg %val) #0 { -; GFX940-LABEL: preload_block_count_x_imparg_align_ptr_i8: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s6, s[0:1], 0x10 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB10_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB10_0: -; GFX940-NEXT: s_and_b32 s0, s4, 0xff -; GFX940-NEXT: s_add_i32 s0, s6, s0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_x_imparg_align_ptr_i8: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s6, s[0:1], 0x10 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB10_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB10_0: +; GFX942-NEXT: s_and_b32 s0, s4, 0xff +; GFX942-NEXT: s_add_i32 s0, s6, s0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_x_imparg_align_ptr_i8: ; GFX90a: ; %bb.1: @@ -389,22 +389,22 @@ define amdgpu_kernel void @preload_block_count_x_imparg_align_ptr_i8(ptr addrspa } define amdgpu_kernel void @preload_block_count_xyz(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_block_count_xyz: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s6, s[0:1], 0x10 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB11_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB11_0: -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_xyz: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s6, s[0:1], 0x10 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB11_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB11_0: +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s4 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: v_mov_b32_e32 v2, s6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_xyz: ; GFX90a: ; %bb.1: @@ -437,20 +437,20 @@ define amdgpu_kernel void @preload_block_count_xyz(ptr addrspace(1) inreg %out) } define amdgpu_kernel void @preload_workgroup_size_x(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_workgroup_size_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB12_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB12_0: -; GFX940-NEXT: s_and_b32 s0, s7, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_workgroup_size_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB12_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB12_0: +; GFX942-NEXT: s_and_b32 s0, s7, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_workgroup_size_x: ; GFX90a: ; %bb.1: @@ -475,20 +475,20 @@ define amdgpu_kernel void @preload_workgroup_size_x(ptr addrspace(1) inreg %out) } define amdgpu_kernel void @preload_workgroup_size_y(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_workgroup_size_y: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB13_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB13_0: -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_workgroup_size_y: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB13_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB13_0: +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_workgroup_size_y: ; GFX90a: ; %bb.1: @@ -513,21 +513,21 @@ define amdgpu_kernel void @preload_workgroup_size_y(ptr addrspace(1) inreg %out) } define amdgpu_kernel void @preload_workgroup_size_z(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_workgroup_size_z: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s8, s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB14_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB14_0: -; GFX940-NEXT: s_and_b32 s0, s8, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_workgroup_size_z: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s8, s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB14_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB14_0: +; GFX942-NEXT: s_and_b32 s0, s8, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_workgroup_size_z: ; GFX90a: ; %bb.1: @@ -553,25 +553,25 @@ define amdgpu_kernel void @preload_workgroup_size_z(ptr addrspace(1) inreg %out) } define amdgpu_kernel void @preload_workgroup_size_xyz(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_workgroup_size_xyz: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s8, s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB15_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB15_0: -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_and_b32 s1, s7, 0xffff -; GFX940-NEXT: s_and_b32 s4, s8, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_workgroup_size_xyz: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s8, s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB15_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB15_0: +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_and_b32 s1, s7, 0xffff +; GFX942-NEXT: s_and_b32 s4, s8, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: v_mov_b32_e32 v2, s4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_workgroup_size_xyz: ; GFX90a: ; %bb.1: @@ -610,21 +610,21 @@ define amdgpu_kernel void @preload_workgroup_size_xyz(ptr addrspace(1) inreg %ou } define amdgpu_kernel void @preload_remainder_x(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_remainder_x: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s8, s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB16_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB16_0: -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_remainder_x: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s8, s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB16_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB16_0: +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_remainder_x: ; GFX90a: ; %bb.1: @@ -650,21 +650,21 @@ define amdgpu_kernel void @preload_remainder_x(ptr addrspace(1) inreg %out) #0 { } define amdgpu_kernel void @preloadremainder_y(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preloadremainder_y: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB17_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB17_0: -; GFX940-NEXT: s_and_b32 s0, s9, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preloadremainder_y: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB17_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB17_0: +; GFX942-NEXT: s_and_b32 s0, s9, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preloadremainder_y: ; GFX90a: ; %bb.1: @@ -690,21 +690,21 @@ define amdgpu_kernel void @preloadremainder_y(ptr addrspace(1) inreg %out) #0 { } define amdgpu_kernel void @preloadremainder_z(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preloadremainder_z: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB18_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB18_0: -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preloadremainder_z: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB18_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB18_0: +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preloadremainder_z: ; GFX90a: ; %bb.1: @@ -730,25 +730,25 @@ define amdgpu_kernel void @preloadremainder_z(ptr addrspace(1) inreg %out) #0 { } define amdgpu_kernel void @preloadremainder_xyz(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preloadremainder_xyz: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB19_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB19_0: -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_lshr_b32 s1, s8, 16 -; GFX940-NEXT: s_and_b32 s4, s9, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preloadremainder_xyz: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB19_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB19_0: +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_lshr_b32 s1, s8, 16 +; GFX942-NEXT: s_and_b32 s4, s9, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preloadremainder_xyz: ; GFX90a: ; %bb.1: @@ -787,19 +787,19 @@ define amdgpu_kernel void @preloadremainder_xyz(ptr addrspace(1) inreg %out) #0 } define amdgpu_kernel void @no_free_sgprs_preloadremainder_z(ptr addrspace(1) inreg %out) { -; GFX940-LABEL: no_free_sgprs_preloadremainder_z: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB20_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB20_0: -; GFX940-NEXT: s_lshr_b32 s0, s15, 16 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[8:9] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: no_free_sgprs_preloadremainder_z: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB20_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB20_0: +; GFX942-NEXT: s_lshr_b32 s0, s15, 16 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[8:9] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: no_free_sgprs_preloadremainder_z: ; GFX90a: ; %bb.1: @@ -827,20 +827,20 @@ define amdgpu_kernel void @no_free_sgprs_preloadremainder_z(ptr addrspace(1) inr ; Check for consistency between isel and earlier passes preload SGPR accounting with max preload SGPRs. define amdgpu_kernel void @preload_block_max_user_sgprs(ptr addrspace(1) inreg %out, i192 inreg %t0, i32 inreg %t1) #0 { -; GFX940-LABEL: preload_block_max_user_sgprs: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s12, s[0:1], 0x28 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB21_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB21_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s12 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_max_user_sgprs: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s12, s[0:1], 0x28 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB21_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB21_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s12 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_max_user_sgprs: ; GFX90a: ; %bb.1: @@ -866,24 +866,24 @@ define amdgpu_kernel void @preload_block_max_user_sgprs(ptr addrspace(1) inreg % } define amdgpu_kernel void @preload_block_count_z_workgroup_size_z_remainder_z(ptr addrspace(1) inreg %out) #0 { -; GFX940-LABEL: preload_block_count_z_workgroup_size_z_remainder_z: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB22_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB22_0: -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_and_b32 s1, s8, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: preload_block_count_z_workgroup_size_z_remainder_z: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB22_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB22_0: +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_and_b32 s1, s8, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s1 +; GFX942-NEXT: v_mov_b32_e32 v2, s0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: preload_block_count_z_workgroup_size_z_remainder_z: ; GFX90a: ; %bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll index ab0fb7584d50c..91bfedd46e6fa 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -S < %s | FileCheck -check-prefix=NO-PRELOAD %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=1 -S < %s | FileCheck -check-prefix=PRELOAD-1 %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=3 -S < %s | FileCheck -check-prefix=PRELOAD-3 %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=8 -S < %s | FileCheck -check-prefix=PRELOAD-8 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-attributor -amdgpu-lower-kernel-arguments -S < %s | FileCheck -check-prefix=NO-PRELOAD %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=1 -S < %s | FileCheck -check-prefix=PRELOAD-1 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=3 -S < %s | FileCheck -check-prefix=PRELOAD-3 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=8 -S < %s | FileCheck -check-prefix=PRELOAD-8 %s define amdgpu_kernel void @test_preload_IR_lowering_kernel_2(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 { ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_2 diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll index 0f60888bcb2f5..436116f3f72a5 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a %s define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) inreg %out, i8 inreg %arg0) #0 { -; GFX940-LABEL: ptr1_i8: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB0_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB0_0: -; GFX940-NEXT: s_and_b32 s0, s4, 0xff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i8: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB0_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB0_0: +; GFX942-NEXT: s_and_b32 s0, s4, 0xff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i8: ; GFX90a: ; %bb.1: @@ -39,20 +39,20 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) inreg %out, i8 inreg %arg0) } define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) inreg %out, i8 zeroext inreg %arg0) #0 { -; GFX940-LABEL: ptr1_i8_zext_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB1_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB1_0: -; GFX940-NEXT: s_and_b32 s0, s4, 0xff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i8_zext_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB1_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB1_0: +; GFX942-NEXT: s_and_b32 s0, s4, 0xff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i8_zext_arg: ; GFX90a: ; %bb.1: @@ -74,20 +74,20 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) inreg %out, i8 zero } define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %arg0) #0 { -; GFX940-LABEL: ptr1_i16_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB2_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB2_0: -; GFX940-NEXT: s_and_b32 s0, s4, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i16_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB2_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB2_0: +; GFX942-NEXT: s_and_b32 s0, s4, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i16_preload_arg: ; GFX90a: ; %bb.1: @@ -109,19 +109,19 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) inreg %out, i16 } define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) inreg %out, i32 inreg %arg0) #0 { -; GFX940-LABEL: ptr1_i32_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB3_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB3_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i32_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB3_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB3_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i32_preload_arg: ; GFX90a: ; %bb.1: @@ -142,21 +142,21 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) inreg %out, i32 define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 inreg %arg0, ptr addrspace(1) inreg %out, i32 inreg %arg1) #0 { -; GFX940-LABEL: i32_ptr1_i32_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_load_dword s6, s[0:1], 0x10 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB4_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB4_0: -; GFX940-NEXT: s_add_i32 s0, s2, s6 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i32_ptr1_i32_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_load_dword s6, s[0:1], 0x10 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB4_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB4_0: +; GFX942-NEXT: s_add_i32 s0, s2, s6 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[4:5] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i32_ptr1_i32_preload_arg: ; GFX90a: ; %bb.1: @@ -179,22 +179,22 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 inreg %arg0, ptr addrspa } define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %arg0, i16 inreg %arg1) #0 { -; GFX940-LABEL: ptr1_i16_i16_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB5_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB5_0: -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_and_b32 s1, s4, 0xffff -; GFX940-NEXT: s_add_i32 s0, s1, s0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i16_i16_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB5_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB5_0: +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_and_b32 s1, s4, 0xffff +; GFX942-NEXT: s_add_i32 s0, s1, s0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i16_i16_preload_arg: ; GFX90a: ; %bb.1: @@ -220,19 +220,19 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) inreg %out, } define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) inreg %out, <2 x i8> inreg %in) #0 { -; GFX940-LABEL: ptr1_v2i8_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB6_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB6_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_v2i8_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB6_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB6_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_v2i8_preload_arg: ; GFX90a: ; %bb.1: @@ -253,24 +253,24 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) inreg %out, <2 define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) inreg %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) #0 { -; GFX940-LABEL: byref_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB7_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB7_0: -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x100 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: v_mov_b32_e32 v2, s5 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: byref_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB7_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB7_0: +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x100 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: v_mov_b32_e32 v2, s5 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: byref_preload_arg: ; GFX90a: ; %bb.1: @@ -299,24 +299,24 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) inreg %out, ptr ad ; The second argument is not expected to be preloaded with the current behavior. define amdgpu_kernel void @byref_staggered_preload_arg(ptr addrspace(1) inreg %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 inreg %after.offset) #0 { -; GFX940-LABEL: byref_staggered_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB8_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB8_0: -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x100 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: v_mov_b32_e32 v2, s5 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: byref_staggered_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB8_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB8_0: +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x100 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: v_mov_b32_e32 v2, s5 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: byref_staggered_preload_arg: ; GFX90a: ; %bb.1: @@ -344,29 +344,29 @@ define amdgpu_kernel void @byref_staggered_preload_arg(ptr addrspace(1) inreg %o define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture inreg %out, <8 x i32> inreg %in) #0 { -; GFX940-LABEL: v8i32_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB9_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB9_0: -; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v8i32_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB9_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB9_0: +; GFX942-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, s8 +; GFX942-NEXT: v_mov_b32_e32 v1, s9 +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: v_mov_b32_e32 v3, s11 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, s4 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: v_mov_b32_e32 v2, s6 +; GFX942-NEXT: v_mov_b32_e32 v3, s7 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v8i32_arg: ; GFX90a: ; %bb.1: @@ -396,21 +396,21 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture inreg %out, <8 x } define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture inreg %out, <3 x i16> inreg %in) #0 { -; GFX940-LABEL: v3i16_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB10_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB10_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v3i16_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB10_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB10_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] offset:4 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v3i16_preload_arg: ; GFX90a: ; %bb.1: @@ -432,22 +432,22 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture inreg %o } define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture inreg %out, <3 x i32> inreg %in) #0 { -; GFX940-LABEL: v3i32_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB11_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB11_0: -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v3i32_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB11_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB11_0: +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v3i32_preload_arg: ; GFX90a: ; %bb.1: @@ -470,22 +470,22 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture inreg %o } define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture inreg %out, <3 x float> inreg %in) #0 { -; GFX940-LABEL: v3f32_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB12_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB12_0: -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v3f32_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB12_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB12_0: +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v3f32_preload_arg: ; GFX90a: ; %bb.1: @@ -508,28 +508,28 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture inreg %o } define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture inreg %out, <5 x i8> inreg %in) #0 { -; GFX940-LABEL: v5i8_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB13_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB13_0: -; GFX940-NEXT: s_lshr_b32 s1, s4, 24 -; GFX940-NEXT: s_and_b32 s0, s4, 0xffff -; GFX940-NEXT: s_lshl_b32 s1, s1, 8 -; GFX940-NEXT: s_bfe_u32 s4, s4, 0x80010 -; GFX940-NEXT: s_or_b32 s1, s4, s1 -; GFX940-NEXT: s_lshl_b32 s1, s1, 16 -; GFX940-NEXT: s_or_b32 s0, s0, s1 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v5i8_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB13_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB13_0: +; GFX942-NEXT: s_lshr_b32 s1, s4, 24 +; GFX942-NEXT: s_and_b32 s0, s4, 0xffff +; GFX942-NEXT: s_lshl_b32 s1, s1, 8 +; GFX942-NEXT: s_bfe_u32 s4, s4, 0x80010 +; GFX942-NEXT: s_or_b32 s1, s4, s1 +; GFX942-NEXT: s_lshl_b32 s1, s1, 16 +; GFX942-NEXT: s_or_b32 s0, s0, s1 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_byte v0, v1, s[2:3] offset:4 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v5i8_preload_arg: ; GFX90a: ; %bb.1: @@ -558,32 +558,32 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture inreg %ou } define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture inreg %out, <5 x double> inreg %in) #0 { -; GFX940-LABEL: v5f64_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB14_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB14_0: -; GFX940-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; GFX940-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v5f64_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB14_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB14_0: +; GFX942-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 +; GFX942-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[12:13] +; GFX942-NEXT: v_mov_b32_e32 v0, s8 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 +; GFX942-NEXT: v_mov_b32_e32 v1, s9 +; GFX942-NEXT: v_mov_b32_e32 v2, s10 +; GFX942-NEXT: v_mov_b32_e32 v3, s11 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, s4 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: v_mov_b32_e32 v2, s6 +; GFX942-NEXT: v_mov_b32_e32 v3, s7 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v5f64_arg: ; GFX90a: ; %bb.1: @@ -616,34 +616,34 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture inreg %out, <5 x } define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) inreg %out, <8 x i8> inreg %in) #0 { -; GFX940-LABEL: v8i8_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB15_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB15_0: -; GFX940-NEXT: s_lshr_b32 s1, s5, 24 -; GFX940-NEXT: s_and_b32 s0, s5, 0xffff -; GFX940-NEXT: s_lshl_b32 s1, s1, 8 -; GFX940-NEXT: s_bfe_u32 s5, s5, 0x80010 -; GFX940-NEXT: s_or_b32 s1, s5, s1 -; GFX940-NEXT: s_lshl_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s5, s4, 24 -; GFX940-NEXT: s_or_b32 s0, s0, s1 -; GFX940-NEXT: s_and_b32 s1, s4, 0xffff -; GFX940-NEXT: s_lshl_b32 s5, s5, 8 -; GFX940-NEXT: s_bfe_u32 s4, s4, 0x80010 -; GFX940-NEXT: s_or_b32 s4, s4, s5 -; GFX940-NEXT: s_lshl_b32 s4, s4, 16 -; GFX940-NEXT: s_or_b32 s1, s1, s4 -; GFX940-NEXT: v_mov_b32_e32 v0, s1 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v8i8_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB15_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB15_0: +; GFX942-NEXT: s_lshr_b32 s1, s5, 24 +; GFX942-NEXT: s_and_b32 s0, s5, 0xffff +; GFX942-NEXT: s_lshl_b32 s1, s1, 8 +; GFX942-NEXT: s_bfe_u32 s5, s5, 0x80010 +; GFX942-NEXT: s_or_b32 s1, s5, s1 +; GFX942-NEXT: s_lshl_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s5, s4, 24 +; GFX942-NEXT: s_or_b32 s0, s0, s1 +; GFX942-NEXT: s_and_b32 s1, s4, 0xffff +; GFX942-NEXT: s_lshl_b32 s5, s5, 8 +; GFX942-NEXT: s_bfe_u32 s4, s4, 0x80010 +; GFX942-NEXT: s_or_b32 s4, s4, s5 +; GFX942-NEXT: s_lshl_b32 s4, s4, 16 +; GFX942-NEXT: s_or_b32 s1, s1, s4 +; GFX942-NEXT: v_mov_b32_e32 v0, s1 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v8i8_preload_arg: ; GFX90a: ; %bb.1: @@ -678,19 +678,19 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) inreg %out, <8 x i8 } define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) inreg %out, i64 inreg %a) #0 { -; GFX940-LABEL: i64_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB16_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB16_0: -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i64_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB16_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB16_0: +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i64_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -710,19 +710,19 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) inreg %out, i } define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) inreg %out, double inreg %in) #0 { -; GFX940-LABEL: f64_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB17_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB17_0: -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: f64_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB17_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB17_0: +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: f64_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -742,19 +742,19 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) inreg %out, d } define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) inreg %out, half inreg %in) #0 { -; GFX940-LABEL: half_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB18_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB18_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: half_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB18_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB18_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: half_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -774,19 +774,19 @@ define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) inreg %out, } define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out, bfloat inreg %in) #0 { -; GFX940-LABEL: bfloat_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB19_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB19_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: bfloat_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB19_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB19_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: bfloat_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -806,19 +806,19 @@ define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out } define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out, <2 x bfloat> inreg %in) #0 { -; GFX940-LABEL: v2bfloat_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB20_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB20_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v2bfloat_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB20_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB20_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v2bfloat_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -838,21 +838,21 @@ define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) inreg %o } define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out, <3 x bfloat> inreg %in) #0 { -; GFX940-LABEL: v3bfloat_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB21_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB21_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v3bfloat_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB21_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB21_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] offset:4 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v3bfloat_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -874,22 +874,22 @@ define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) inreg %o } define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out, <6 x bfloat> inreg %in) #0 { -; GFX940-LABEL: v6bfloat_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB22_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB22_0: -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v6bfloat_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB22_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB22_0: +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v6bfloat_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -912,25 +912,25 @@ define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) inreg %o } define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) inreg %out, half inreg %in, <7 x bfloat> inreg %in2, ptr addrspace(1) inreg %out2) #0 { -; GFX940-LABEL: half_v7bfloat_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB23_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB23_0: -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-NEXT: global_store_short v3, v0, s[2:3] sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, s9 -; GFX940-NEXT: global_store_short v3, v0, s[10:11] offset:12 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB23_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB23_0: +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s4 +; GFX942-NEXT: global_store_short v3, v0, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v0, s9 +; GFX942-NEXT: global_store_short v3, v0, s[10:11] offset:12 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: half_v7bfloat_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -959,20 +959,20 @@ define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) inr } define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) inreg %out, i1 inreg %in) #0 { -; GFX940-LABEL: i1_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dword s4, s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB24_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB24_0: -; GFX940-NEXT: s_and_b32 s0, s4, 1 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_byte v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i1_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB24_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB24_0: +; GFX942-NEXT: s_and_b32 s0, s4, 1 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_byte v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i1_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -993,23 +993,23 @@ define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) inreg %out, i1 } define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out, fp128 inreg %in) #0 { -; GFX940-LABEL: fp128_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB25_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB25_0: -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: v_mov_b32_e32 v3, s9 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: fp128_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB25_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB25_0: +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: v_mov_b32_e32 v3, s9 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: fp128_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1033,29 +1033,29 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out, } define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) inreg %out, <7 x i8> inreg %in) #0 { -; GFX940-LABEL: v7i8_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB26_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB26_0: -; GFX940-NEXT: s_lshr_b32 s1, s4, 24 -; GFX940-NEXT: s_and_b32 s0, s4, 0xffff -; GFX940-NEXT: s_lshl_b32 s1, s1, 8 -; GFX940-NEXT: s_bfe_u32 s4, s4, 0x80010 -; GFX940-NEXT: s_or_b32 s1, s4, s1 -; GFX940-NEXT: s_lshl_b32 s1, s1, 16 -; GFX940-NEXT: s_or_b32 s0, s0, s1 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:6 sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v7i8_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB26_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB26_0: +; GFX942-NEXT: s_lshr_b32 s1, s4, 24 +; GFX942-NEXT: s_and_b32 s0, s4, 0xffff +; GFX942-NEXT: s_lshl_b32 s1, s1, 8 +; GFX942-NEXT: s_bfe_u32 s4, s4, 0x80010 +; GFX942-NEXT: s_or_b32 s1, s4, s1 +; GFX942-NEXT: s_lshl_b32 s1, s1, 16 +; GFX942-NEXT: s_or_b32 s0, s0, s1 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_byte_d16_hi v0, v1, s[2:3] offset:6 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] offset:4 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v7i8_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1085,24 +1085,24 @@ define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) inreg %out, } define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) inreg %out, <7 x half> inreg %in) #0 { -; GFX940-LABEL: v7half_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB27_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB27_0: -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, s9 -; GFX940-NEXT: global_store_short v3, v0, s[2:3] offset:12 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: v7half_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x18 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB27_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB27_0: +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, s9 +; GFX942-NEXT: global_store_short v3, v0, s[2:3] offset:12 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: v7half_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1127,21 +1127,21 @@ define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) inreg %out } define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %in, i32 inreg %in2, ptr addrspace(1) inreg %out2) #0 { -; GFX940-LABEL: i16_i32_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB28_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB28_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i16_i32_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB28_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB28_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v1, s5 +; GFX942-NEXT: global_store_dword v0, v1, s[6:7] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i16_i32_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1164,23 +1164,23 @@ define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) inreg %ou } define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %in, <3 x i32> inreg %in2, ptr addrspace(1) inreg %out2) #0 { -; GFX940-LABEL: i16_v3i32_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB29_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB29_0: -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 -; GFX940-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-NEXT: global_store_short v3, v4, s[2:3] sc0 sc1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i16_v3i32_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB29_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB29_0: +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, s4 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s8 +; GFX942-NEXT: global_store_short v3, v4, s[2:3] +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i16_v3i32_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1207,20 +1207,20 @@ define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) inreg % } define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %in, i16 inreg %in2, ptr addrspace(1) inreg %out2) #0 { -; GFX940-LABEL: i16_i16_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB30_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB30_0: -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[6:7] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i16_i16_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB30_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB30_0: +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[6:7] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i16_i16_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1242,25 +1242,25 @@ define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) inreg %ou } define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) inreg %out, i16 inreg %in, <2 x i8> inreg %in2, ptr addrspace(1) inreg %out2) #0 { -; GFX940-LABEL: i16_v2i8_kernel_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB31_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB31_0: -; GFX940-NEXT: s_lshr_b32 s0, s4, 24 -; GFX940-NEXT: s_lshl_b32 s0, s0, 8 -; GFX940-NEXT: s_bfe_u32 s1, s4, 0x80010 -; GFX940-NEXT: s_or_b32 s0, s1, s0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_short v0, v1, s[6:7] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i16_v2i8_kernel_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB31_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB31_0: +; GFX942-NEXT: s_lshr_b32 s0, s4, 24 +; GFX942-NEXT: s_lshl_b32 s0, s0, 8 +; GFX942-NEXT: s_bfe_u32 s1, s4, 0x80010 +; GFX942-NEXT: s_or_b32 s0, s1, s0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s4 +; GFX942-NEXT: global_store_short v0, v1, s[2:3] +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_short v0, v1, s[6:7] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i16_v2i8_kernel_preload_arg: ; GFX90a: ; %bb.1: @@ -1289,22 +1289,22 @@ define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) inreg %o ; The second argument is not expected to be preloaded with the current behavior. define amdgpu_kernel void @i32_ptr1_i32_staggered_preload_arg(i32 inreg %arg0, ptr addrspace(1) %out, i32 inreg %arg1) #0 { -; GFX940-LABEL: i32_ptr1_i32_staggered_preload_arg: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dword s2, s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB32_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB32_0: -; GFX940-NEXT: s_load_dword s3, s[0:1], 0x10 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s2, s3 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: i32_ptr1_i32_staggered_preload_arg: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB32_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB32_0: +; GFX942-NEXT: s_load_dword s3, s[0:1], 0x10 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_add_i32 s0, s2, s3 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[4:5] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: i32_ptr1_i32_staggered_preload_arg: ; GFX90a: ; %bb.1: @@ -1328,20 +1328,20 @@ define amdgpu_kernel void @i32_ptr1_i32_staggered_preload_arg(i32 inreg %arg0, p } define amdgpu_kernel void @ptr1_i8_trailing_unused(ptr addrspace(1) inreg %out, i8 inreg %arg0, i32 inreg %unused) #0 { -; GFX940-LABEL: ptr1_i8_trailing_unused: -; GFX940: ; %bb.1: -; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_branch .LBB33_0 -; GFX940-NEXT: .p2align 8 -; GFX940-NEXT: ; %bb.2: -; GFX940-NEXT: .LBB33_0: -; GFX940-NEXT: s_and_b32 s0, s4, 0xff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: ptr1_i8_trailing_unused: +; GFX942: ; %bb.1: +; GFX942-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_branch .LBB33_0 +; GFX942-NEXT: .p2align 8 +; GFX942-NEXT: ; %bb.2: +; GFX942-NEXT: .LBB33_0: +; GFX942-NEXT: s_and_b32 s0, s4, 0xff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, s0 +; GFX942-NEXT: global_store_dword v0, v1, s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX90a-LABEL: ptr1_i8_trailing_unused: ; GFX90a: ; %bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v2bf16.ll index d76945c49d71d..282be2ff22500 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v2bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2bf16_v2bf16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2bf16_v2bf16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2bf16_v2bf16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -132,17 +132,17 @@ define void @v_shuffle_v2bf16_v2bf16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v2bf16_v2bf16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v2bf16_v2bf16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v2bf16_v2bf16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v2bf16_v2bf16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -373,17 +373,17 @@ define void @v_shuffle_v2bf16_v2bf16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -417,18 +417,18 @@ define void @v_shuffle_v2bf16_v2bf16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> zeroinitializer store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -460,17 +460,17 @@ define void @v_shuffle_v2bf16_v2bf16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -502,17 +502,17 @@ define void @v_shuffle_v2bf16_v2bf16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -542,16 +542,16 @@ define void @v_shuffle_v2bf16_v2bf16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -581,16 +581,16 @@ define void @v_shuffle_v2bf16_v2bf16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -624,18 +624,18 @@ define void @v_shuffle_v2bf16_v2bf16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -665,16 +665,16 @@ define void @v_shuffle_v2bf16_v2bf16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -715,16 +715,16 @@ define void @v_shuffle_v2bf16_v2bf16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -756,17 +756,17 @@ define void @v_shuffle_v2bf16_v2bf16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -807,16 +807,16 @@ define void @v_shuffle_v2bf16_v2bf16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -857,22 +857,22 @@ define void @v_shuffle_v2bf16_v2bf16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -913,22 +913,22 @@ define void @v_shuffle_v2bf16_v2bf16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -959,16 +959,16 @@ define void @v_shuffle_v2bf16_v2bf16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v2bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v2bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1013,17 +1013,17 @@ define void @s_shuffle_v2bf16_v2bf16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1055,17 +1055,17 @@ define void @s_shuffle_v2bf16_v2bf16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1111,17 +1111,17 @@ define void @s_shuffle_v2bf16_v2bf16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1162,21 +1162,21 @@ define void @s_shuffle_v2bf16_v2bf16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1219,22 +1219,22 @@ define void @s_shuffle_v2bf16_v2bf16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1269,18 +1269,18 @@ define void @s_shuffle_v2bf16_v2bf16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1315,18 +1315,18 @@ define void @s_shuffle_v2bf16_v2bf16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1359,17 +1359,17 @@ define void @s_shuffle_v2bf16_v2bf16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1401,17 +1401,17 @@ define void @s_shuffle_v2bf16_v2bf16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1445,18 +1445,18 @@ define void @s_shuffle_v2bf16_v2bf16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1488,17 +1488,17 @@ define void @s_shuffle_v2bf16_v2bf16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1528,17 +1528,17 @@ define void @s_shuffle_v2bf16_v2bf16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1568,17 +1568,17 @@ define void @s_shuffle_v2bf16_v2bf16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1612,18 +1612,18 @@ define void @s_shuffle_v2bf16_v2bf16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1653,17 +1653,17 @@ define void @s_shuffle_v2bf16_v2bf16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1707,17 +1707,17 @@ define void @s_shuffle_v2bf16_v2bf16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1749,17 +1749,17 @@ define void @s_shuffle_v2bf16_v2bf16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -1803,17 +1803,17 @@ define void @s_shuffle_v2bf16_v2bf16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1854,21 +1854,21 @@ define void @s_shuffle_v2bf16_v2bf16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1911,22 +1911,22 @@ define void @s_shuffle_v2bf16_v2bf16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> @@ -1957,17 +1957,17 @@ define void @s_shuffle_v2bf16_v2bf16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v2bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v2bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v3bf16.ll index 6181a46d02943..75a350db74de0 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v3bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v3bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2bf16_v3bf16__u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v2bf16_v3bf16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v2bf16_v3bf16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -122,16 +122,16 @@ define void @v_shuffle_v2bf16_v3bf16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -176,17 +176,17 @@ define void @v_shuffle_v2bf16_v3bf16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -219,16 +219,16 @@ define void @v_shuffle_v2bf16_v3bf16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -271,22 +271,22 @@ define void @v_shuffle_v2bf16_v3bf16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -329,22 +329,22 @@ define void @v_shuffle_v2bf16_v3bf16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -387,22 +387,22 @@ define void @v_shuffle_v2bf16_v3bf16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -439,18 +439,18 @@ define void @v_shuffle_v2bf16_v3bf16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -487,18 +487,18 @@ define void @v_shuffle_v2bf16_v3bf16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -535,18 +535,18 @@ define void @v_shuffle_v2bf16_v3bf16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -581,17 +581,17 @@ define void @v_shuffle_v2bf16_v3bf16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v2bf16_v3bf16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> zeroinitializer @@ -670,17 +670,17 @@ define void @v_shuffle_v2bf16_v3bf16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -715,18 +715,18 @@ define void @v_shuffle_v2bf16_v3bf16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -759,17 +759,17 @@ define void @v_shuffle_v2bf16_v3bf16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -808,21 +808,21 @@ define void @v_shuffle_v2bf16_v3bf16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -855,16 +855,16 @@ define void @v_shuffle_v2bf16_v3bf16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -895,16 +895,16 @@ define void @v_shuffle_v2bf16_v3bf16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -939,18 +939,18 @@ define void @v_shuffle_v2bf16_v3bf16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -985,18 +985,18 @@ define void @v_shuffle_v2bf16_v3bf16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1027,16 +1027,16 @@ define void @v_shuffle_v2bf16_v3bf16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1077,22 +1077,22 @@ define void @v_shuffle_v2bf16_v3bf16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1127,17 +1127,17 @@ define void @v_shuffle_v2bf16_v3bf16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1172,18 +1172,18 @@ define void @v_shuffle_v2bf16_v3bf16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1216,17 +1216,17 @@ define void @v_shuffle_v2bf16_v3bf16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1261,18 +1261,18 @@ define void @v_shuffle_v2bf16_v3bf16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1305,17 +1305,17 @@ define void @v_shuffle_v2bf16_v3bf16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1354,21 +1354,21 @@ define void @v_shuffle_v2bf16_v3bf16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1413,16 +1413,16 @@ define void @v_shuffle_v2bf16_v3bf16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1455,17 +1455,17 @@ define void @v_shuffle_v2bf16_v3bf16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1496,16 +1496,16 @@ define void @v_shuffle_v2bf16_v3bf16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -1550,17 +1550,17 @@ define void @v_shuffle_v2bf16_v3bf16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1593,16 +1593,16 @@ define void @v_shuffle_v2bf16_v3bf16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1645,22 +1645,22 @@ define void @v_shuffle_v2bf16_v3bf16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1703,22 +1703,22 @@ define void @v_shuffle_v2bf16_v3bf16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1761,22 +1761,22 @@ define void @v_shuffle_v2bf16_v3bf16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1809,16 +1809,16 @@ define void @v_shuffle_v2bf16_v3bf16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1855,18 +1855,18 @@ define void @v_shuffle_v2bf16_v3bf16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1901,17 +1901,17 @@ define void @v_shuffle_v2bf16_v3bf16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1954,22 +1954,22 @@ define void @v_shuffle_v2bf16_v3bf16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2010,21 +2010,21 @@ define void @v_shuffle_v2bf16_v3bf16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2067,22 +2067,22 @@ define void @v_shuffle_v2bf16_v3bf16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2119,18 +2119,18 @@ define void @v_shuffle_v2bf16_v3bf16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2165,17 +2165,17 @@ define void @v_shuffle_v2bf16_v3bf16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v3bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v3bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2223,17 +2223,17 @@ define void @s_shuffle_v2bf16_v3bf16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2266,17 +2266,17 @@ define void @s_shuffle_v2bf16_v3bf16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2309,17 +2309,17 @@ define void @s_shuffle_v2bf16_v3bf16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2367,17 +2367,17 @@ define void @s_shuffle_v2bf16_v3bf16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2412,17 +2412,17 @@ define void @s_shuffle_v2bf16_v3bf16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2463,20 +2463,20 @@ define void @s_shuffle_v2bf16_v3bf16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2519,21 +2519,21 @@ define void @s_shuffle_v2bf16_v3bf16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2574,20 +2574,20 @@ define void @s_shuffle_v2bf16_v3bf16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2622,17 +2622,17 @@ define void @s_shuffle_v2bf16_v3bf16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2669,18 +2669,18 @@ define void @s_shuffle_v2bf16_v3bf16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2715,17 +2715,17 @@ define void @s_shuffle_v2bf16_v3bf16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2760,17 +2760,17 @@ define void @s_shuffle_v2bf16_v3bf16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2803,17 +2803,17 @@ define void @s_shuffle_v2bf16_v3bf16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> zeroinitializer @@ -2848,18 +2848,18 @@ define void @s_shuffle_v2bf16_v3bf16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2892,17 +2892,17 @@ define void @s_shuffle_v2bf16_v3bf16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2935,17 +2935,17 @@ define void @s_shuffle_v2bf16_v3bf16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -2986,21 +2986,21 @@ define void @s_shuffle_v2bf16_v3bf16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3033,17 +3033,17 @@ define void @s_shuffle_v2bf16_v3bf16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3074,17 +3074,17 @@ define void @s_shuffle_v2bf16_v3bf16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3119,18 +3119,18 @@ define void @s_shuffle_v2bf16_v3bf16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3165,18 +3165,18 @@ define void @s_shuffle_v2bf16_v3bf16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3207,17 +3207,17 @@ define void @s_shuffle_v2bf16_v3bf16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3260,22 +3260,22 @@ define void @s_shuffle_v2bf16_v3bf16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3310,17 +3310,17 @@ define void @s_shuffle_v2bf16_v3bf16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3353,17 +3353,17 @@ define void @s_shuffle_v2bf16_v3bf16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3398,18 +3398,18 @@ define void @s_shuffle_v2bf16_v3bf16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3442,17 +3442,17 @@ define void @s_shuffle_v2bf16_v3bf16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3485,17 +3485,17 @@ define void @s_shuffle_v2bf16_v3bf16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3536,21 +3536,21 @@ define void @s_shuffle_v2bf16_v3bf16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3598,17 +3598,17 @@ define void @s_shuffle_v2bf16_v3bf16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3641,17 +3641,17 @@ define void @s_shuffle_v2bf16_v3bf16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3684,17 +3684,17 @@ define void @s_shuffle_v2bf16_v3bf16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <2 x i32> @@ -3744,18 +3744,18 @@ define void @s_shuffle_v2bf16_v3bf16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3788,17 +3788,17 @@ define void @s_shuffle_v2bf16_v3bf16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3841,21 +3841,21 @@ define void @s_shuffle_v2bf16_v3bf16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3900,22 +3900,22 @@ define void @s_shuffle_v2bf16_v3bf16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3958,21 +3958,21 @@ define void @s_shuffle_v2bf16_v3bf16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4005,17 +4005,17 @@ define void @s_shuffle_v2bf16_v3bf16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4052,18 +4052,18 @@ define void @s_shuffle_v2bf16_v3bf16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4098,17 +4098,17 @@ define void @s_shuffle_v2bf16_v3bf16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4149,20 +4149,20 @@ define void @s_shuffle_v2bf16_v3bf16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4205,21 +4205,21 @@ define void @s_shuffle_v2bf16_v3bf16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4260,20 +4260,20 @@ define void @s_shuffle_v2bf16_v3bf16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4308,17 +4308,17 @@ define void @s_shuffle_v2bf16_v3bf16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4355,18 +4355,18 @@ define void @s_shuffle_v2bf16_v3bf16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v3bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v3bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v4bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v4bf16.ll index 25ea06b5daacd..d2e11097c1d6d 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v4bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v4bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2bf16_v4bf16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2bf16_v4bf16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2bf16_v4bf16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2bf16_v4bf16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2bf16_v4bf16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -213,17 +213,17 @@ define void @v_shuffle_v2bf16_v4bf16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v2bf16_v4bf16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -296,17 +296,17 @@ define void @v_shuffle_v2bf16_v4bf16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -345,21 +345,21 @@ define void @v_shuffle_v2bf16_v4bf16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -400,22 +400,22 @@ define void @v_shuffle_v2bf16_v4bf16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -454,21 +454,21 @@ define void @v_shuffle_v2bf16_v4bf16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -509,22 +509,22 @@ define void @v_shuffle_v2bf16_v4bf16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -557,17 +557,17 @@ define void @v_shuffle_v2bf16_v4bf16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -602,18 +602,18 @@ define void @v_shuffle_v2bf16_v4bf16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -646,17 +646,17 @@ define void @v_shuffle_v2bf16_v4bf16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -691,18 +691,18 @@ define void @v_shuffle_v2bf16_v4bf16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -735,17 +735,17 @@ define void @v_shuffle_v2bf16_v4bf16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -779,18 +779,18 @@ define void @v_shuffle_v2bf16_v4bf16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> zeroinitializer store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -822,17 +822,17 @@ define void @v_shuffle_v2bf16_v4bf16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -866,18 +866,18 @@ define void @v_shuffle_v2bf16_v4bf16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -909,17 +909,17 @@ define void @v_shuffle_v2bf16_v4bf16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -951,17 +951,17 @@ define void @v_shuffle_v2bf16_v4bf16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -999,21 +999,21 @@ define void @v_shuffle_v2bf16_v4bf16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1054,22 +1054,22 @@ define void @v_shuffle_v2bf16_v4bf16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1100,16 +1100,16 @@ define void @v_shuffle_v2bf16_v4bf16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1139,16 +1139,16 @@ define void @v_shuffle_v2bf16_v4bf16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1182,18 +1182,18 @@ define void @v_shuffle_v2bf16_v4bf16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1227,18 +1227,18 @@ define void @v_shuffle_v2bf16_v4bf16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1272,18 +1272,18 @@ define void @v_shuffle_v2bf16_v4bf16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1313,16 +1313,16 @@ define void @v_shuffle_v2bf16_v4bf16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1362,22 +1362,22 @@ define void @v_shuffle_v2bf16_v4bf16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1418,22 +1418,22 @@ define void @v_shuffle_v2bf16_v4bf16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1466,17 +1466,17 @@ define void @v_shuffle_v2bf16_v4bf16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1510,18 +1510,18 @@ define void @v_shuffle_v2bf16_v4bf16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1553,17 +1553,17 @@ define void @v_shuffle_v2bf16_v4bf16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1597,18 +1597,18 @@ define void @v_shuffle_v2bf16_v4bf16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1640,17 +1640,17 @@ define void @v_shuffle_v2bf16_v4bf16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v2bf16_v4bf16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1730,21 +1730,21 @@ define void @v_shuffle_v2bf16_v4bf16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1785,22 +1785,22 @@ define void @v_shuffle_v2bf16_v4bf16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -1831,16 +1831,16 @@ define void @v_shuffle_v2bf16_v4bf16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1874,18 +1874,18 @@ define void @v_shuffle_v2bf16_v4bf16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1919,18 +1919,18 @@ define void @v_shuffle_v2bf16_v4bf16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1960,16 +1960,16 @@ define void @v_shuffle_v2bf16_v4bf16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2003,18 +2003,18 @@ define void @v_shuffle_v2bf16_v4bf16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2044,16 +2044,16 @@ define void @v_shuffle_v2bf16_v4bf16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2093,22 +2093,22 @@ define void @v_shuffle_v2bf16_v4bf16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2149,22 +2149,22 @@ define void @v_shuffle_v2bf16_v4bf16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2206,16 +2206,16 @@ define void @v_shuffle_v2bf16_v4bf16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2247,17 +2247,17 @@ define void @v_shuffle_v2bf16_v4bf16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2287,16 +2287,16 @@ define void @v_shuffle_v2bf16_v4bf16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2328,17 +2328,17 @@ define void @v_shuffle_v2bf16_v4bf16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2381,17 +2381,17 @@ define void @v_shuffle_v2bf16_v4bf16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2426,18 +2426,18 @@ define void @v_shuffle_v2bf16_v4bf16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2468,16 +2468,16 @@ define void @v_shuffle_v2bf16_v4bf16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2518,22 +2518,22 @@ define void @v_shuffle_v2bf16_v4bf16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2574,22 +2574,22 @@ define void @v_shuffle_v2bf16_v4bf16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2630,22 +2630,22 @@ define void @v_shuffle_v2bf16_v4bf16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2686,22 +2686,22 @@ define void @v_shuffle_v2bf16_v4bf16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2732,16 +2732,16 @@ define void @v_shuffle_v2bf16_v4bf16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2776,18 +2776,18 @@ define void @v_shuffle_v2bf16_v4bf16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2822,18 +2822,18 @@ define void @v_shuffle_v2bf16_v4bf16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2866,17 +2866,17 @@ define void @v_shuffle_v2bf16_v4bf16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2917,22 +2917,22 @@ define void @v_shuffle_v2bf16_v4bf16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -2971,21 +2971,21 @@ define void @v_shuffle_v2bf16_v4bf16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3026,22 +3026,22 @@ define void @v_shuffle_v2bf16_v4bf16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3080,21 +3080,21 @@ define void @v_shuffle_v2bf16_v4bf16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3129,18 +3129,18 @@ define void @v_shuffle_v2bf16_v4bf16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3173,17 +3173,17 @@ define void @v_shuffle_v2bf16_v4bf16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3218,18 +3218,18 @@ define void @v_shuffle_v2bf16_v4bf16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3260,16 +3260,16 @@ define void @v_shuffle_v2bf16_v4bf16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3310,22 +3310,22 @@ define void @v_shuffle_v2bf16_v4bf16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3366,22 +3366,22 @@ define void @v_shuffle_v2bf16_v4bf16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3422,22 +3422,22 @@ define void @v_shuffle_v2bf16_v4bf16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3478,22 +3478,22 @@ define void @v_shuffle_v2bf16_v4bf16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3528,18 +3528,18 @@ define void @v_shuffle_v2bf16_v4bf16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3574,18 +3574,18 @@ define void @v_shuffle_v2bf16_v4bf16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3616,16 +3616,16 @@ define void @v_shuffle_v2bf16_v4bf16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v4bf16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3670,17 +3670,17 @@ define void @s_shuffle_v2bf16_v4bf16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -3712,17 +3712,17 @@ define void @s_shuffle_v2bf16_v4bf16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -3754,17 +3754,17 @@ define void @s_shuffle_v2bf16_v4bf16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -3796,17 +3796,17 @@ define void @s_shuffle_v2bf16_v4bf16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -3852,17 +3852,17 @@ define void @s_shuffle_v2bf16_v4bf16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3895,17 +3895,17 @@ define void @s_shuffle_v2bf16_v4bf16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3938,17 +3938,17 @@ define void @s_shuffle_v2bf16_v4bf16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -3989,21 +3989,21 @@ define void @s_shuffle_v2bf16_v4bf16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4046,22 +4046,22 @@ define void @s_shuffle_v2bf16_v4bf16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4102,21 +4102,21 @@ define void @s_shuffle_v2bf16_v4bf16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4159,22 +4159,22 @@ define void @s_shuffle_v2bf16_v4bf16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4209,18 +4209,18 @@ define void @s_shuffle_v2bf16_v4bf16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4257,19 +4257,19 @@ define void @s_shuffle_v2bf16_v4bf16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4304,18 +4304,18 @@ define void @s_shuffle_v2bf16_v4bf16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4350,18 +4350,18 @@ define void @s_shuffle_v2bf16_v4bf16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4394,17 +4394,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4436,17 +4436,17 @@ define void @s_shuffle_v2bf16_v4bf16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4480,18 +4480,18 @@ define void @s_shuffle_v2bf16_v4bf16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4523,17 +4523,17 @@ define void @s_shuffle_v2bf16_v4bf16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4567,18 +4567,18 @@ define void @s_shuffle_v2bf16_v4bf16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4610,17 +4610,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4660,21 +4660,21 @@ define void @s_shuffle_v2bf16_v4bf16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4713,20 +4713,20 @@ define void @s_shuffle_v2bf16_v4bf16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -4757,17 +4757,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4797,17 +4797,17 @@ define void @s_shuffle_v2bf16_v4bf16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4841,18 +4841,18 @@ define void @s_shuffle_v2bf16_v4bf16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4886,18 +4886,18 @@ define void @s_shuffle_v2bf16_v4bf16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4933,19 +4933,19 @@ define void @s_shuffle_v2bf16_v4bf16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -4975,17 +4975,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5027,22 +5027,22 @@ define void @s_shuffle_v2bf16_v4bf16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5083,21 +5083,21 @@ define void @s_shuffle_v2bf16_v4bf16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5130,17 +5130,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5172,17 +5172,17 @@ define void @s_shuffle_v2bf16_v4bf16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5216,18 +5216,18 @@ define void @s_shuffle_v2bf16_v4bf16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5259,17 +5259,17 @@ define void @s_shuffle_v2bf16_v4bf16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5303,18 +5303,18 @@ define void @s_shuffle_v2bf16_v4bf16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5346,17 +5346,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5396,21 +5396,21 @@ define void @s_shuffle_v2bf16_v4bf16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5449,20 +5449,20 @@ define void @s_shuffle_v2bf16_v4bf16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5495,17 +5495,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5539,18 +5539,18 @@ define void @s_shuffle_v2bf16_v4bf16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5586,19 +5586,19 @@ define void @s_shuffle_v2bf16_v4bf16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5630,17 +5630,17 @@ define void @s_shuffle_v2bf16_v4bf16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5674,18 +5674,18 @@ define void @s_shuffle_v2bf16_v4bf16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5717,17 +5717,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5769,22 +5769,22 @@ define void @s_shuffle_v2bf16_v4bf16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5825,21 +5825,21 @@ define void @s_shuffle_v2bf16_v4bf16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -5884,17 +5884,17 @@ define void @s_shuffle_v2bf16_v4bf16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5926,17 +5926,17 @@ define void @s_shuffle_v2bf16_v4bf16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -5968,17 +5968,17 @@ define void @s_shuffle_v2bf16_v4bf16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -6010,17 +6010,17 @@ define void @s_shuffle_v2bf16_v4bf16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -6068,18 +6068,18 @@ define void @s_shuffle_v2bf16_v4bf16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6112,17 +6112,17 @@ define void @s_shuffle_v2bf16_v4bf16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6153,17 +6153,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6204,21 +6204,21 @@ define void @s_shuffle_v2bf16_v4bf16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6261,22 +6261,22 @@ define void @s_shuffle_v2bf16_v4bf16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6317,21 +6317,21 @@ define void @s_shuffle_v2bf16_v4bf16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6374,22 +6374,22 @@ define void @s_shuffle_v2bf16_v4bf16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6420,17 +6420,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6465,18 +6465,18 @@ define void @s_shuffle_v2bf16_v4bf16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6511,18 +6511,18 @@ define void @s_shuffle_v2bf16_v4bf16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6555,17 +6555,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6604,20 +6604,20 @@ define void @s_shuffle_v2bf16_v4bf16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6658,21 +6658,21 @@ define void @s_shuffle_v2bf16_v4bf16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6711,20 +6711,20 @@ define void @s_shuffle_v2bf16_v4bf16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6765,21 +6765,21 @@ define void @s_shuffle_v2bf16_v4bf16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6812,17 +6812,17 @@ define void @s_shuffle_v2bf16_v4bf16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6857,18 +6857,18 @@ define void @s_shuffle_v2bf16_v4bf16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6901,17 +6901,17 @@ define void @s_shuffle_v2bf16_v4bf16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6944,17 +6944,17 @@ define void @s_shuffle_v2bf16_v4bf16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -6995,21 +6995,21 @@ define void @s_shuffle_v2bf16_v4bf16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7052,22 +7052,22 @@ define void @s_shuffle_v2bf16_v4bf16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7108,21 +7108,21 @@ define void @s_shuffle_v2bf16_v4bf16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7165,22 +7165,22 @@ define void @s_shuffle_v2bf16_v4bf16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7215,18 +7215,18 @@ define void @s_shuffle_v2bf16_v4bf16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7263,19 +7263,19 @@ define void @s_shuffle_v2bf16_v4bf16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> @@ -7308,17 +7308,17 @@ define void @s_shuffle_v2bf16_v4bf16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v4bf16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v8bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v8bf16.ll index 383acfe1f57c8..b1c0d40552b2b 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v8bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2bf16.v8bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2bf16_v8bf16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2bf16_v8bf16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2bf16_v8bf16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -200,16 +200,16 @@ define void @v_shuffle_v2bf16_v8bf16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -241,17 +241,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -281,16 +281,16 @@ define void @v_shuffle_v2bf16_v8bf16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -322,17 +322,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -375,17 +375,17 @@ define void @v_shuffle_v2bf16_v8bf16__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -416,16 +416,16 @@ define void @v_shuffle_v2bf16_v8bf16__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -458,17 +458,17 @@ define void @v_shuffle_v2bf16_v8bf16__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -499,16 +499,16 @@ define void @v_shuffle_v2bf16_v8bf16__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -541,17 +541,17 @@ define void @v_shuffle_v2bf16_v8bf16__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -582,16 +582,16 @@ define void @v_shuffle_v2bf16_v8bf16__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -624,17 +624,17 @@ define void @v_shuffle_v2bf16_v8bf16__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -673,21 +673,21 @@ define void @v_shuffle_v2bf16_v8bf16__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -728,22 +728,22 @@ define void @v_shuffle_v2bf16_v8bf16__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -782,21 +782,21 @@ define void @v_shuffle_v2bf16_v8bf16__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -837,22 +837,22 @@ define void @v_shuffle_v2bf16_v8bf16__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -891,21 +891,21 @@ define void @v_shuffle_v2bf16_v8bf16__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -946,22 +946,22 @@ define void @v_shuffle_v2bf16_v8bf16__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1000,21 +1000,21 @@ define void @v_shuffle_v2bf16_v8bf16__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1055,22 +1055,22 @@ define void @v_shuffle_v2bf16_v8bf16__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1103,17 +1103,17 @@ define void @v_shuffle_v2bf16_v8bf16__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1148,18 +1148,18 @@ define void @v_shuffle_v2bf16_v8bf16__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1192,17 +1192,17 @@ define void @v_shuffle_v2bf16_v8bf16__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1237,18 +1237,18 @@ define void @v_shuffle_v2bf16_v8bf16__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1281,17 +1281,17 @@ define void @v_shuffle_v2bf16_v8bf16__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1326,18 +1326,18 @@ define void @v_shuffle_v2bf16_v8bf16__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1370,17 +1370,17 @@ define void @v_shuffle_v2bf16_v8bf16__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1415,18 +1415,18 @@ define void @v_shuffle_v2bf16_v8bf16__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1459,17 +1459,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1503,18 +1503,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> zeroinitializer store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1546,17 +1546,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1590,18 +1590,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1633,17 +1633,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1677,18 +1677,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1720,17 +1720,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1764,18 +1764,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1807,17 +1807,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1849,17 +1849,17 @@ define void @v_shuffle_v2bf16_v8bf16__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1897,21 +1897,21 @@ define void @v_shuffle_v2bf16_v8bf16__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -1952,22 +1952,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2006,21 +2006,21 @@ define void @v_shuffle_v2bf16_v8bf16__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2061,22 +2061,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2115,21 +2115,21 @@ define void @v_shuffle_v2bf16_v8bf16__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2170,22 +2170,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2216,16 +2216,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2255,16 +2255,16 @@ define void @v_shuffle_v2bf16_v8bf16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2298,18 +2298,18 @@ define void @v_shuffle_v2bf16_v8bf16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2343,18 +2343,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v2bf16_v8bf16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2433,18 +2433,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2478,18 +2478,18 @@ define void @v_shuffle_v2bf16_v8bf16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2523,18 +2523,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2568,18 +2568,18 @@ define void @v_shuffle_v2bf16_v8bf16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2609,16 +2609,16 @@ define void @v_shuffle_v2bf16_v8bf16__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2658,22 +2658,22 @@ define void @v_shuffle_v2bf16_v8bf16__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2714,22 +2714,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v2bf16_v8bf16__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2826,22 +2826,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2882,22 +2882,22 @@ define void @v_shuffle_v2bf16_v8bf16__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2938,22 +2938,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -2986,17 +2986,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3030,18 +3030,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3073,17 +3073,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3117,18 +3117,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3160,17 +3160,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3204,18 +3204,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3247,17 +3247,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3291,18 +3291,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3334,17 +3334,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3376,17 +3376,17 @@ define void @v_shuffle_v2bf16_v8bf16__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3424,21 +3424,21 @@ define void @v_shuffle_v2bf16_v8bf16__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3479,22 +3479,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3533,21 +3533,21 @@ define void @v_shuffle_v2bf16_v8bf16__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3588,22 +3588,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3642,21 +3642,21 @@ define void @v_shuffle_v2bf16_v8bf16__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3697,22 +3697,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -3743,16 +3743,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3786,18 +3786,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3831,18 +3831,18 @@ define void @v_shuffle_v2bf16_v8bf16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3872,16 +3872,16 @@ define void @v_shuffle_v2bf16_v8bf16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3915,18 +3915,18 @@ define void @v_shuffle_v2bf16_v8bf16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3960,18 +3960,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4005,18 +4005,18 @@ define void @v_shuffle_v2bf16_v8bf16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4050,18 +4050,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4095,18 +4095,18 @@ define void @v_shuffle_v2bf16_v8bf16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4136,16 +4136,16 @@ define void @v_shuffle_v2bf16_v8bf16__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4185,22 +4185,22 @@ define void @v_shuffle_v2bf16_v8bf16__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4241,22 +4241,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4297,22 +4297,22 @@ define void @v_shuffle_v2bf16_v8bf16__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4353,22 +4353,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4409,22 +4409,22 @@ define void @v_shuffle_v2bf16_v8bf16__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4465,22 +4465,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -4513,17 +4513,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4557,18 +4557,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4600,17 +4600,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4644,18 +4644,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4687,17 +4687,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4731,18 +4731,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4774,17 +4774,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4818,18 +4818,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4861,17 +4861,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4903,17 +4903,17 @@ define void @v_shuffle_v2bf16_v8bf16__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4951,21 +4951,21 @@ define void @v_shuffle_v2bf16_v8bf16__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5006,22 +5006,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5060,21 +5060,21 @@ define void @v_shuffle_v2bf16_v8bf16__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5115,22 +5115,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5169,21 +5169,21 @@ define void @v_shuffle_v2bf16_v8bf16__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5270,16 +5270,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5313,18 +5313,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5358,18 +5358,18 @@ define void @v_shuffle_v2bf16_v8bf16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5403,18 +5403,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5448,18 +5448,18 @@ define void @v_shuffle_v2bf16_v8bf16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5489,16 +5489,16 @@ define void @v_shuffle_v2bf16_v8bf16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5532,18 +5532,18 @@ define void @v_shuffle_v2bf16_v8bf16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5577,18 +5577,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5622,18 +5622,18 @@ define void @v_shuffle_v2bf16_v8bf16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5663,16 +5663,16 @@ define void @v_shuffle_v2bf16_v8bf16__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5712,22 +5712,22 @@ define void @v_shuffle_v2bf16_v8bf16__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5768,22 +5768,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5824,22 +5824,22 @@ define void @v_shuffle_v2bf16_v8bf16__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5880,22 +5880,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5936,22 +5936,22 @@ define void @v_shuffle_v2bf16_v8bf16__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -5992,22 +5992,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6040,17 +6040,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6084,18 +6084,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6127,17 +6127,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6171,18 +6171,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6214,17 +6214,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6258,18 +6258,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6301,17 +6301,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6345,18 +6345,18 @@ define void @v_shuffle_v2bf16_v8bf16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6388,17 +6388,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6430,17 +6430,17 @@ define void @v_shuffle_v2bf16_v8bf16__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6478,21 +6478,21 @@ define void @v_shuffle_v2bf16_v8bf16__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6533,22 +6533,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6587,21 +6587,21 @@ define void @v_shuffle_v2bf16_v8bf16__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6642,22 +6642,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6696,21 +6696,21 @@ define void @v_shuffle_v2bf16_v8bf16__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6751,22 +6751,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -6797,16 +6797,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6840,18 +6840,18 @@ define void @v_shuffle_v2bf16_v8bf16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6885,18 +6885,18 @@ define void @v_shuffle_v2bf16_v8bf16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6930,18 +6930,18 @@ define void @v_shuffle_v2bf16_v8bf16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6975,18 +6975,18 @@ define void @v_shuffle_v2bf16_v8bf16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7020,18 +7020,18 @@ define void @v_shuffle_v2bf16_v8bf16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7065,18 +7065,18 @@ define void @v_shuffle_v2bf16_v8bf16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7106,16 +7106,16 @@ define void @v_shuffle_v2bf16_v8bf16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7149,18 +7149,18 @@ define void @v_shuffle_v2bf16_v8bf16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7190,16 +7190,16 @@ define void @v_shuffle_v2bf16_v8bf16__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7239,22 +7239,22 @@ define void @v_shuffle_v2bf16_v8bf16__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7295,22 +7295,22 @@ define void @v_shuffle_v2bf16_v8bf16__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7351,22 +7351,22 @@ define void @v_shuffle_v2bf16_v8bf16__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7407,22 +7407,22 @@ define void @v_shuffle_v2bf16_v8bf16__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7463,22 +7463,22 @@ define void @v_shuffle_v2bf16_v8bf16__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7519,22 +7519,22 @@ define void @v_shuffle_v2bf16_v8bf16__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7576,16 +7576,16 @@ define void @v_shuffle_v2bf16_v8bf16__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7617,17 +7617,17 @@ define void @v_shuffle_v2bf16_v8bf16__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7657,16 +7657,16 @@ define void @v_shuffle_v2bf16_v8bf16__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7698,17 +7698,17 @@ define void @v_shuffle_v2bf16_v8bf16__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7738,16 +7738,16 @@ define void @v_shuffle_v2bf16_v8bf16__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7779,17 +7779,17 @@ define void @v_shuffle_v2bf16_v8bf16__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7819,16 +7819,16 @@ define void @v_shuffle_v2bf16_v8bf16__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7860,17 +7860,17 @@ define void @v_shuffle_v2bf16_v8bf16__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7913,17 +7913,17 @@ define void @v_shuffle_v2bf16_v8bf16__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -7958,18 +7958,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8002,17 +8002,17 @@ define void @v_shuffle_v2bf16_v8bf16__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8047,18 +8047,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8091,17 +8091,17 @@ define void @v_shuffle_v2bf16_v8bf16__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8136,18 +8136,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8178,16 +8178,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8228,22 +8228,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8284,22 +8284,22 @@ define void @v_shuffle_v2bf16_v8bf16__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8340,22 +8340,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8396,22 +8396,22 @@ define void @v_shuffle_v2bf16_v8bf16__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8452,22 +8452,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8508,22 +8508,22 @@ define void @v_shuffle_v2bf16_v8bf16__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8564,22 +8564,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8620,22 +8620,22 @@ define void @v_shuffle_v2bf16_v8bf16__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8666,16 +8666,16 @@ define void @v_shuffle_v2bf16_v8bf16__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8710,18 +8710,18 @@ define void @v_shuffle_v2bf16_v8bf16__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8756,18 +8756,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8802,18 +8802,18 @@ define void @v_shuffle_v2bf16_v8bf16__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8848,18 +8848,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8894,18 +8894,18 @@ define void @v_shuffle_v2bf16_v8bf16__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8940,18 +8940,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -8984,17 +8984,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9035,22 +9035,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9089,21 +9089,21 @@ define void @v_shuffle_v2bf16_v8bf16__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9144,22 +9144,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9198,21 +9198,21 @@ define void @v_shuffle_v2bf16_v8bf16__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9253,22 +9253,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9307,21 +9307,21 @@ define void @v_shuffle_v2bf16_v8bf16__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9362,22 +9362,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9416,21 +9416,21 @@ define void @v_shuffle_v2bf16_v8bf16__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9465,18 +9465,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9509,17 +9509,17 @@ define void @v_shuffle_v2bf16_v8bf16__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9554,18 +9554,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9598,17 +9598,17 @@ define void @v_shuffle_v2bf16_v8bf16__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9643,18 +9643,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9687,17 +9687,17 @@ define void @v_shuffle_v2bf16_v8bf16__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9732,18 +9732,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9774,16 +9774,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9824,22 +9824,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9880,22 +9880,22 @@ define void @v_shuffle_v2bf16_v8bf16__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9936,22 +9936,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -9992,22 +9992,22 @@ define void @v_shuffle_v2bf16_v8bf16__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10048,22 +10048,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10104,22 +10104,22 @@ define void @v_shuffle_v2bf16_v8bf16__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10160,22 +10160,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10216,22 +10216,22 @@ define void @v_shuffle_v2bf16_v8bf16__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10266,18 +10266,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10312,18 +10312,18 @@ define void @v_shuffle_v2bf16_v8bf16__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10354,16 +10354,16 @@ define void @v_shuffle_v2bf16_v8bf16__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10398,18 +10398,18 @@ define void @v_shuffle_v2bf16_v8bf16__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10444,18 +10444,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10490,18 +10490,18 @@ define void @v_shuffle_v2bf16_v8bf16__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10536,18 +10536,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10580,17 +10580,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10631,22 +10631,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10685,21 +10685,21 @@ define void @v_shuffle_v2bf16_v8bf16__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10740,22 +10740,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10794,21 +10794,21 @@ define void @v_shuffle_v2bf16_v8bf16__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10849,22 +10849,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10903,21 +10903,21 @@ define void @v_shuffle_v2bf16_v8bf16__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -10958,22 +10958,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11012,21 +11012,21 @@ define void @v_shuffle_v2bf16_v8bf16__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11061,18 +11061,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11105,17 +11105,17 @@ define void @v_shuffle_v2bf16_v8bf16__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11150,18 +11150,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11194,17 +11194,17 @@ define void @v_shuffle_v2bf16_v8bf16__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11239,18 +11239,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11283,17 +11283,17 @@ define void @v_shuffle_v2bf16_v8bf16__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11328,18 +11328,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11370,16 +11370,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11420,22 +11420,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11476,22 +11476,22 @@ define void @v_shuffle_v2bf16_v8bf16__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11532,22 +11532,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11588,22 +11588,22 @@ define void @v_shuffle_v2bf16_v8bf16__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11644,22 +11644,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11700,22 +11700,22 @@ define void @v_shuffle_v2bf16_v8bf16__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11756,22 +11756,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11812,22 +11812,22 @@ define void @v_shuffle_v2bf16_v8bf16__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11862,18 +11862,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11908,18 +11908,18 @@ define void @v_shuffle_v2bf16_v8bf16__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -11954,18 +11954,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12000,18 +12000,18 @@ define void @v_shuffle_v2bf16_v8bf16__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12042,16 +12042,16 @@ define void @v_shuffle_v2bf16_v8bf16__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12086,18 +12086,18 @@ define void @v_shuffle_v2bf16_v8bf16__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12132,18 +12132,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12176,17 +12176,17 @@ define void @v_shuffle_v2bf16_v8bf16__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12227,22 +12227,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12281,21 +12281,21 @@ define void @v_shuffle_v2bf16_v8bf16__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12336,22 +12336,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12390,21 +12390,21 @@ define void @v_shuffle_v2bf16_v8bf16__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12445,22 +12445,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12499,21 +12499,21 @@ define void @v_shuffle_v2bf16_v8bf16__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12554,22 +12554,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12608,21 +12608,21 @@ define void @v_shuffle_v2bf16_v8bf16__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12657,18 +12657,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12701,17 +12701,17 @@ define void @v_shuffle_v2bf16_v8bf16__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12746,18 +12746,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12790,17 +12790,17 @@ define void @v_shuffle_v2bf16_v8bf16__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12835,18 +12835,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12879,17 +12879,17 @@ define void @v_shuffle_v2bf16_v8bf16__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12924,18 +12924,18 @@ define void @v_shuffle_v2bf16_v8bf16__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -12966,16 +12966,16 @@ define void @v_shuffle_v2bf16_v8bf16__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13016,22 +13016,22 @@ define void @v_shuffle_v2bf16_v8bf16__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13072,22 +13072,22 @@ define void @v_shuffle_v2bf16_v8bf16__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13128,22 +13128,22 @@ define void @v_shuffle_v2bf16_v8bf16__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13184,22 +13184,22 @@ define void @v_shuffle_v2bf16_v8bf16__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13240,22 +13240,22 @@ define void @v_shuffle_v2bf16_v8bf16__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13296,22 +13296,22 @@ define void @v_shuffle_v2bf16_v8bf16__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13352,22 +13352,22 @@ define void @v_shuffle_v2bf16_v8bf16__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13408,22 +13408,22 @@ define void @v_shuffle_v2bf16_v8bf16__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13458,18 +13458,18 @@ define void @v_shuffle_v2bf16_v8bf16__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13504,18 +13504,18 @@ define void @v_shuffle_v2bf16_v8bf16__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13550,18 +13550,18 @@ define void @v_shuffle_v2bf16_v8bf16__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13596,18 +13596,18 @@ define void @v_shuffle_v2bf16_v8bf16__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13642,18 +13642,18 @@ define void @v_shuffle_v2bf16_v8bf16__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13688,18 +13688,18 @@ define void @v_shuffle_v2bf16_v8bf16__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13730,16 +13730,16 @@ define void @v_shuffle_v2bf16_v8bf16__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2bf16_v8bf16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2bf16_v8bf16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=v"() %vec1 = call <8 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -13784,17 +13784,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -13826,17 +13826,17 @@ define void @s_shuffle_v2bf16_v8bf16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -13868,17 +13868,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -13910,17 +13910,17 @@ define void @s_shuffle_v2bf16_v8bf16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -13952,17 +13952,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -13994,17 +13994,17 @@ define void @s_shuffle_v2bf16_v8bf16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -14036,17 +14036,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -14078,17 +14078,17 @@ define void @s_shuffle_v2bf16_v8bf16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -14134,17 +14134,17 @@ define void @s_shuffle_v2bf16_v8bf16__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14177,17 +14177,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14220,17 +14220,17 @@ define void @s_shuffle_v2bf16_v8bf16__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14263,17 +14263,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14306,17 +14306,17 @@ define void @s_shuffle_v2bf16_v8bf16__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14349,17 +14349,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14392,17 +14392,17 @@ define void @s_shuffle_v2bf16_v8bf16__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14443,21 +14443,21 @@ define void @s_shuffle_v2bf16_v8bf16__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14500,22 +14500,22 @@ define void @s_shuffle_v2bf16_v8bf16__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14556,21 +14556,21 @@ define void @s_shuffle_v2bf16_v8bf16__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14613,22 +14613,22 @@ define void @s_shuffle_v2bf16_v8bf16__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14669,21 +14669,21 @@ define void @s_shuffle_v2bf16_v8bf16__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14726,22 +14726,22 @@ define void @s_shuffle_v2bf16_v8bf16__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14782,21 +14782,21 @@ define void @s_shuffle_v2bf16_v8bf16__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14839,22 +14839,22 @@ define void @s_shuffle_v2bf16_v8bf16__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14889,18 +14889,18 @@ define void @s_shuffle_v2bf16_v8bf16__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14937,19 +14937,19 @@ define void @s_shuffle_v2bf16_v8bf16__15_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -14984,18 +14984,18 @@ define void @s_shuffle_v2bf16_v8bf16__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15032,19 +15032,19 @@ define void @s_shuffle_v2bf16_v8bf16__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15079,18 +15079,18 @@ define void @s_shuffle_v2bf16_v8bf16__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15127,19 +15127,19 @@ define void @s_shuffle_v2bf16_v8bf16__15_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15174,18 +15174,18 @@ define void @s_shuffle_v2bf16_v8bf16__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15220,18 +15220,18 @@ define void @s_shuffle_v2bf16_v8bf16__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15264,17 +15264,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15306,17 +15306,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15350,18 +15350,18 @@ define void @s_shuffle_v2bf16_v8bf16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15393,17 +15393,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15437,18 +15437,18 @@ define void @s_shuffle_v2bf16_v8bf16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15480,17 +15480,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15524,18 +15524,18 @@ define void @s_shuffle_v2bf16_v8bf16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15567,17 +15567,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15611,18 +15611,18 @@ define void @s_shuffle_v2bf16_v8bf16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15654,17 +15654,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -15704,21 +15704,21 @@ define void @s_shuffle_v2bf16_v8bf16__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15757,20 +15757,20 @@ define void @s_shuffle_v2bf16_v8bf16__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15811,21 +15811,21 @@ define void @s_shuffle_v2bf16_v8bf16__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15864,20 +15864,20 @@ define void @s_shuffle_v2bf16_v8bf16__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15918,21 +15918,21 @@ define void @s_shuffle_v2bf16_v8bf16__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -15971,20 +15971,20 @@ define void @s_shuffle_v2bf16_v8bf16__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16015,17 +16015,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16055,17 +16055,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16099,18 +16099,18 @@ define void @s_shuffle_v2bf16_v8bf16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16144,18 +16144,18 @@ define void @s_shuffle_v2bf16_v8bf16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16191,19 +16191,19 @@ define void @s_shuffle_v2bf16_v8bf16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16237,18 +16237,18 @@ define void @s_shuffle_v2bf16_v8bf16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16284,19 +16284,19 @@ define void @s_shuffle_v2bf16_v8bf16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16330,18 +16330,18 @@ define void @s_shuffle_v2bf16_v8bf16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16377,19 +16377,19 @@ define void @s_shuffle_v2bf16_v8bf16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16419,17 +16419,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16471,22 +16471,22 @@ define void @s_shuffle_v2bf16_v8bf16__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16527,21 +16527,21 @@ define void @s_shuffle_v2bf16_v8bf16__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16584,22 +16584,22 @@ define void @s_shuffle_v2bf16_v8bf16__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16640,21 +16640,21 @@ define void @s_shuffle_v2bf16_v8bf16__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16697,22 +16697,22 @@ define void @s_shuffle_v2bf16_v8bf16__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16753,21 +16753,21 @@ define void @s_shuffle_v2bf16_v8bf16__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -16800,17 +16800,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16842,17 +16842,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16886,18 +16886,18 @@ define void @s_shuffle_v2bf16_v8bf16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16929,17 +16929,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -16973,18 +16973,18 @@ define void @s_shuffle_v2bf16_v8bf16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17016,17 +17016,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17060,18 +17060,18 @@ define void @s_shuffle_v2bf16_v8bf16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17103,17 +17103,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17147,18 +17147,18 @@ define void @s_shuffle_v2bf16_v8bf16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17190,17 +17190,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17240,21 +17240,21 @@ define void @s_shuffle_v2bf16_v8bf16__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17293,20 +17293,20 @@ define void @s_shuffle_v2bf16_v8bf16__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17347,21 +17347,21 @@ define void @s_shuffle_v2bf16_v8bf16__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17400,20 +17400,20 @@ define void @s_shuffle_v2bf16_v8bf16__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17454,21 +17454,21 @@ define void @s_shuffle_v2bf16_v8bf16__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17507,20 +17507,20 @@ define void @s_shuffle_v2bf16_v8bf16__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -17553,17 +17553,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17597,18 +17597,18 @@ define void @s_shuffle_v2bf16_v8bf16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17644,19 +17644,19 @@ define void @s_shuffle_v2bf16_v8bf16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17688,17 +17688,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17732,18 +17732,18 @@ define void @s_shuffle_v2bf16_v8bf16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17777,18 +17777,18 @@ define void @s_shuffle_v2bf16_v8bf16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17824,19 +17824,19 @@ define void @s_shuffle_v2bf16_v8bf16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17870,18 +17870,18 @@ define void @s_shuffle_v2bf16_v8bf16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17917,19 +17917,19 @@ define void @s_shuffle_v2bf16_v8bf16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -17961,17 +17961,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18013,22 +18013,22 @@ define void @s_shuffle_v2bf16_v8bf16__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18069,21 +18069,21 @@ define void @s_shuffle_v2bf16_v8bf16__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18126,22 +18126,22 @@ define void @s_shuffle_v2bf16_v8bf16__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18182,21 +18182,21 @@ define void @s_shuffle_v2bf16_v8bf16__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18239,22 +18239,22 @@ define void @s_shuffle_v2bf16_v8bf16__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18295,21 +18295,21 @@ define void @s_shuffle_v2bf16_v8bf16__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18342,17 +18342,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18384,17 +18384,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18428,18 +18428,18 @@ define void @s_shuffle_v2bf16_v8bf16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18471,17 +18471,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18515,18 +18515,18 @@ define void @s_shuffle_v2bf16_v8bf16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18558,17 +18558,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18602,18 +18602,18 @@ define void @s_shuffle_v2bf16_v8bf16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18645,17 +18645,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18689,18 +18689,18 @@ define void @s_shuffle_v2bf16_v8bf16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18732,17 +18732,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -18782,21 +18782,21 @@ define void @s_shuffle_v2bf16_v8bf16__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18835,20 +18835,20 @@ define void @s_shuffle_v2bf16_v8bf16__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18889,21 +18889,21 @@ define void @s_shuffle_v2bf16_v8bf16__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18942,20 +18942,20 @@ define void @s_shuffle_v2bf16_v8bf16__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -18996,21 +18996,21 @@ define void @s_shuffle_v2bf16_v8bf16__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19049,20 +19049,20 @@ define void @s_shuffle_v2bf16_v8bf16__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19095,17 +19095,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19139,18 +19139,18 @@ define void @s_shuffle_v2bf16_v8bf16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19186,19 +19186,19 @@ define void @s_shuffle_v2bf16_v8bf16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19232,18 +19232,18 @@ define void @s_shuffle_v2bf16_v8bf16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19279,19 +19279,19 @@ define void @s_shuffle_v2bf16_v8bf16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19323,17 +19323,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19367,18 +19367,18 @@ define void @s_shuffle_v2bf16_v8bf16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19412,18 +19412,18 @@ define void @s_shuffle_v2bf16_v8bf16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19459,19 +19459,19 @@ define void @s_shuffle_v2bf16_v8bf16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19503,17 +19503,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19555,22 +19555,22 @@ define void @s_shuffle_v2bf16_v8bf16__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19611,21 +19611,21 @@ define void @s_shuffle_v2bf16_v8bf16__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19668,22 +19668,22 @@ define void @s_shuffle_v2bf16_v8bf16__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19724,21 +19724,21 @@ define void @s_shuffle_v2bf16_v8bf16__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19781,22 +19781,22 @@ define void @s_shuffle_v2bf16_v8bf16__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19837,21 +19837,21 @@ define void @s_shuffle_v2bf16_v8bf16__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -19884,17 +19884,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19926,17 +19926,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -19970,18 +19970,18 @@ define void @s_shuffle_v2bf16_v8bf16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20013,17 +20013,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20057,18 +20057,18 @@ define void @s_shuffle_v2bf16_v8bf16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20100,17 +20100,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20144,18 +20144,18 @@ define void @s_shuffle_v2bf16_v8bf16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20187,17 +20187,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20231,18 +20231,18 @@ define void @s_shuffle_v2bf16_v8bf16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20274,17 +20274,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20324,21 +20324,21 @@ define void @s_shuffle_v2bf16_v8bf16__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20377,20 +20377,20 @@ define void @s_shuffle_v2bf16_v8bf16__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20431,21 +20431,21 @@ define void @s_shuffle_v2bf16_v8bf16__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20484,20 +20484,20 @@ define void @s_shuffle_v2bf16_v8bf16__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20538,21 +20538,21 @@ define void @s_shuffle_v2bf16_v8bf16__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20591,20 +20591,20 @@ define void @s_shuffle_v2bf16_v8bf16__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -20637,17 +20637,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20681,18 +20681,18 @@ define void @s_shuffle_v2bf16_v8bf16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20728,19 +20728,19 @@ define void @s_shuffle_v2bf16_v8bf16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20774,18 +20774,18 @@ define void @s_shuffle_v2bf16_v8bf16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20821,19 +20821,19 @@ define void @s_shuffle_v2bf16_v8bf16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20867,18 +20867,18 @@ define void @s_shuffle_v2bf16_v8bf16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20914,19 +20914,19 @@ define void @s_shuffle_v2bf16_v8bf16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -20958,17 +20958,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21002,18 +21002,18 @@ define void @s_shuffle_v2bf16_v8bf16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21045,17 +21045,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21097,22 +21097,22 @@ define void @s_shuffle_v2bf16_v8bf16__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21153,21 +21153,21 @@ define void @s_shuffle_v2bf16_v8bf16__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21210,22 +21210,22 @@ define void @s_shuffle_v2bf16_v8bf16__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21266,21 +21266,21 @@ define void @s_shuffle_v2bf16_v8bf16__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21323,22 +21323,22 @@ define void @s_shuffle_v2bf16_v8bf16__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21379,21 +21379,21 @@ define void @s_shuffle_v2bf16_v8bf16__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21438,17 +21438,17 @@ define void @s_shuffle_v2bf16_v8bf16__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21480,17 +21480,17 @@ define void @s_shuffle_v2bf16_v8bf16__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21522,17 +21522,17 @@ define void @s_shuffle_v2bf16_v8bf16__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21564,17 +21564,17 @@ define void @s_shuffle_v2bf16_v8bf16__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21606,17 +21606,17 @@ define void @s_shuffle_v2bf16_v8bf16__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21648,17 +21648,17 @@ define void @s_shuffle_v2bf16_v8bf16__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21690,17 +21690,17 @@ define void @s_shuffle_v2bf16_v8bf16__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21732,17 +21732,17 @@ define void @s_shuffle_v2bf16_v8bf16__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) @@ -21790,18 +21790,18 @@ define void @s_shuffle_v2bf16_v8bf16__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21834,17 +21834,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21879,18 +21879,18 @@ define void @s_shuffle_v2bf16_v8bf16__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21923,17 +21923,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -21968,18 +21968,18 @@ define void @s_shuffle_v2bf16_v8bf16__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22012,17 +22012,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22053,17 +22053,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22104,21 +22104,21 @@ define void @s_shuffle_v2bf16_v8bf16__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22161,22 +22161,22 @@ define void @s_shuffle_v2bf16_v8bf16__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22217,21 +22217,21 @@ define void @s_shuffle_v2bf16_v8bf16__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22274,22 +22274,22 @@ define void @s_shuffle_v2bf16_v8bf16__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22330,21 +22330,21 @@ define void @s_shuffle_v2bf16_v8bf16__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22387,22 +22387,22 @@ define void @s_shuffle_v2bf16_v8bf16__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22443,21 +22443,21 @@ define void @s_shuffle_v2bf16_v8bf16__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22500,22 +22500,22 @@ define void @s_shuffle_v2bf16_v8bf16__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22546,17 +22546,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22591,18 +22591,18 @@ define void @s_shuffle_v2bf16_v8bf16__9_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22637,18 +22637,18 @@ define void @s_shuffle_v2bf16_v8bf16__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22685,19 +22685,19 @@ define void @s_shuffle_v2bf16_v8bf16__11_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22732,18 +22732,18 @@ define void @s_shuffle_v2bf16_v8bf16__12_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22780,19 +22780,19 @@ define void @s_shuffle_v2bf16_v8bf16__13_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22827,18 +22827,18 @@ define void @s_shuffle_v2bf16_v8bf16__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22871,17 +22871,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22920,20 +22920,20 @@ define void @s_shuffle_v2bf16_v8bf16__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -22974,21 +22974,21 @@ define void @s_shuffle_v2bf16_v8bf16__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23027,20 +23027,20 @@ define void @s_shuffle_v2bf16_v8bf16__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23081,21 +23081,21 @@ define void @s_shuffle_v2bf16_v8bf16__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23134,20 +23134,20 @@ define void @s_shuffle_v2bf16_v8bf16__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23188,21 +23188,21 @@ define void @s_shuffle_v2bf16_v8bf16__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23241,20 +23241,20 @@ define void @s_shuffle_v2bf16_v8bf16__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23295,21 +23295,21 @@ define void @s_shuffle_v2bf16_v8bf16__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23342,17 +23342,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23387,18 +23387,18 @@ define void @s_shuffle_v2bf16_v8bf16__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23431,17 +23431,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23476,18 +23476,18 @@ define void @s_shuffle_v2bf16_v8bf16__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23520,17 +23520,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23565,18 +23565,18 @@ define void @s_shuffle_v2bf16_v8bf16__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23609,17 +23609,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23652,17 +23652,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23703,21 +23703,21 @@ define void @s_shuffle_v2bf16_v8bf16__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23760,22 +23760,22 @@ define void @s_shuffle_v2bf16_v8bf16__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23816,21 +23816,21 @@ define void @s_shuffle_v2bf16_v8bf16__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23873,22 +23873,22 @@ define void @s_shuffle_v2bf16_v8bf16__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23929,21 +23929,21 @@ define void @s_shuffle_v2bf16_v8bf16__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -23986,22 +23986,22 @@ define void @s_shuffle_v2bf16_v8bf16__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24042,21 +24042,21 @@ define void @s_shuffle_v2bf16_v8bf16__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24099,22 +24099,22 @@ define void @s_shuffle_v2bf16_v8bf16__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24149,18 +24149,18 @@ define void @s_shuffle_v2bf16_v8bf16__8_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24197,19 +24197,19 @@ define void @s_shuffle_v2bf16_v8bf16__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24242,17 +24242,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24287,18 +24287,18 @@ define void @s_shuffle_v2bf16_v8bf16__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24333,18 +24333,18 @@ define void @s_shuffle_v2bf16_v8bf16__12_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24381,19 +24381,19 @@ define void @s_shuffle_v2bf16_v8bf16__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24428,18 +24428,18 @@ define void @s_shuffle_v2bf16_v8bf16__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24472,17 +24472,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24521,20 +24521,20 @@ define void @s_shuffle_v2bf16_v8bf16__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24575,21 +24575,21 @@ define void @s_shuffle_v2bf16_v8bf16__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24628,20 +24628,20 @@ define void @s_shuffle_v2bf16_v8bf16__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24682,21 +24682,21 @@ define void @s_shuffle_v2bf16_v8bf16__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24735,20 +24735,20 @@ define void @s_shuffle_v2bf16_v8bf16__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24789,21 +24789,21 @@ define void @s_shuffle_v2bf16_v8bf16__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24842,20 +24842,20 @@ define void @s_shuffle_v2bf16_v8bf16__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24896,21 +24896,21 @@ define void @s_shuffle_v2bf16_v8bf16__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24943,17 +24943,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -24988,18 +24988,18 @@ define void @s_shuffle_v2bf16_v8bf16__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25032,17 +25032,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25077,18 +25077,18 @@ define void @s_shuffle_v2bf16_v8bf16__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25121,17 +25121,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25166,18 +25166,18 @@ define void @s_shuffle_v2bf16_v8bf16__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25210,17 +25210,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25253,17 +25253,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25304,21 +25304,21 @@ define void @s_shuffle_v2bf16_v8bf16__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25361,22 +25361,22 @@ define void @s_shuffle_v2bf16_v8bf16__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25417,21 +25417,21 @@ define void @s_shuffle_v2bf16_v8bf16__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25474,22 +25474,22 @@ define void @s_shuffle_v2bf16_v8bf16__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25530,21 +25530,21 @@ define void @s_shuffle_v2bf16_v8bf16__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25587,22 +25587,22 @@ define void @s_shuffle_v2bf16_v8bf16__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25643,21 +25643,21 @@ define void @s_shuffle_v2bf16_v8bf16__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25700,22 +25700,22 @@ define void @s_shuffle_v2bf16_v8bf16__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25750,18 +25750,18 @@ define void @s_shuffle_v2bf16_v8bf16__8_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25798,19 +25798,19 @@ define void @s_shuffle_v2bf16_v8bf16__9_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25845,18 +25845,18 @@ define void @s_shuffle_v2bf16_v8bf16__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25893,19 +25893,19 @@ define void @s_shuffle_v2bf16_v8bf16__11_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25938,17 +25938,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -25983,18 +25983,18 @@ define void @s_shuffle_v2bf16_v8bf16__13_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26029,18 +26029,18 @@ define void @s_shuffle_v2bf16_v8bf16__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26073,17 +26073,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26122,20 +26122,20 @@ define void @s_shuffle_v2bf16_v8bf16__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26176,21 +26176,21 @@ define void @s_shuffle_v2bf16_v8bf16__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26229,20 +26229,20 @@ define void @s_shuffle_v2bf16_v8bf16__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26283,21 +26283,21 @@ define void @s_shuffle_v2bf16_v8bf16__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26336,20 +26336,20 @@ define void @s_shuffle_v2bf16_v8bf16__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26390,21 +26390,21 @@ define void @s_shuffle_v2bf16_v8bf16__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26443,20 +26443,20 @@ define void @s_shuffle_v2bf16_v8bf16__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26497,21 +26497,21 @@ define void @s_shuffle_v2bf16_v8bf16__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26544,17 +26544,17 @@ define void @s_shuffle_v2bf16_v8bf16__8_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26589,18 +26589,18 @@ define void @s_shuffle_v2bf16_v8bf16__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26633,17 +26633,17 @@ define void @s_shuffle_v2bf16_v8bf16__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26678,18 +26678,18 @@ define void @s_shuffle_v2bf16_v8bf16__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26722,17 +26722,17 @@ define void @s_shuffle_v2bf16_v8bf16__12_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26767,18 +26767,18 @@ define void @s_shuffle_v2bf16_v8bf16__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26811,17 +26811,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26854,17 +26854,17 @@ define void @s_shuffle_v2bf16_v8bf16__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26905,21 +26905,21 @@ define void @s_shuffle_v2bf16_v8bf16__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -26962,22 +26962,22 @@ define void @s_shuffle_v2bf16_v8bf16__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27018,21 +27018,21 @@ define void @s_shuffle_v2bf16_v8bf16__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27075,22 +27075,22 @@ define void @s_shuffle_v2bf16_v8bf16__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27131,21 +27131,21 @@ define void @s_shuffle_v2bf16_v8bf16__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27188,22 +27188,22 @@ define void @s_shuffle_v2bf16_v8bf16__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27244,21 +27244,21 @@ define void @s_shuffle_v2bf16_v8bf16__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27301,22 +27301,22 @@ define void @s_shuffle_v2bf16_v8bf16__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27351,18 +27351,18 @@ define void @s_shuffle_v2bf16_v8bf16__8_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27399,19 +27399,19 @@ define void @s_shuffle_v2bf16_v8bf16__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27446,18 +27446,18 @@ define void @s_shuffle_v2bf16_v8bf16__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27494,19 +27494,19 @@ define void @s_shuffle_v2bf16_v8bf16__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27541,18 +27541,18 @@ define void @s_shuffle_v2bf16_v8bf16__12_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27589,19 +27589,19 @@ define void @s_shuffle_v2bf16_v8bf16__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> @@ -27634,17 +27634,17 @@ define void @s_shuffle_v2bf16_v8bf16__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2bf16_v8bf16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2bf16_v8bf16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x bfloat> asm "; def $0", "=s"() %vec1 = call <8 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <8 x bfloat> %vec0, <8 x bfloat> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v2f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v2f16.ll index c6324cb962133..f8d0944ed8ecd 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v2f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f16_v2f16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f16_v2f16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2f16_v2f16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -132,17 +132,17 @@ define void @v_shuffle_v2f16_v2f16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v2f16_v2f16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v2f16_v2f16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v2f16_v2f16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v2f16_v2f16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -373,17 +373,17 @@ define void @v_shuffle_v2f16_v2f16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -417,18 +417,18 @@ define void @v_shuffle_v2f16_v2f16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> zeroinitializer store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -460,17 +460,17 @@ define void @v_shuffle_v2f16_v2f16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -502,17 +502,17 @@ define void @v_shuffle_v2f16_v2f16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -542,16 +542,16 @@ define void @v_shuffle_v2f16_v2f16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -581,16 +581,16 @@ define void @v_shuffle_v2f16_v2f16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -624,18 +624,18 @@ define void @v_shuffle_v2f16_v2f16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -665,16 +665,16 @@ define void @v_shuffle_v2f16_v2f16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -715,16 +715,16 @@ define void @v_shuffle_v2f16_v2f16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -756,17 +756,17 @@ define void @v_shuffle_v2f16_v2f16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -807,16 +807,16 @@ define void @v_shuffle_v2f16_v2f16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -857,22 +857,22 @@ define void @v_shuffle_v2f16_v2f16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -913,22 +913,22 @@ define void @v_shuffle_v2f16_v2f16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -959,16 +959,16 @@ define void @v_shuffle_v2f16_v2f16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v2f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v2f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1013,17 +1013,17 @@ define void @s_shuffle_v2f16_v2f16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1055,17 +1055,17 @@ define void @s_shuffle_v2f16_v2f16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1111,17 +1111,17 @@ define void @s_shuffle_v2f16_v2f16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1162,21 +1162,21 @@ define void @s_shuffle_v2f16_v2f16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1219,22 +1219,22 @@ define void @s_shuffle_v2f16_v2f16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1269,18 +1269,18 @@ define void @s_shuffle_v2f16_v2f16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1315,18 +1315,18 @@ define void @s_shuffle_v2f16_v2f16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1359,17 +1359,17 @@ define void @s_shuffle_v2f16_v2f16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1401,17 +1401,17 @@ define void @s_shuffle_v2f16_v2f16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1445,18 +1445,18 @@ define void @s_shuffle_v2f16_v2f16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1488,17 +1488,17 @@ define void @s_shuffle_v2f16_v2f16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1528,17 +1528,17 @@ define void @s_shuffle_v2f16_v2f16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1568,17 +1568,17 @@ define void @s_shuffle_v2f16_v2f16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1612,18 +1612,18 @@ define void @s_shuffle_v2f16_v2f16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1653,17 +1653,17 @@ define void @s_shuffle_v2f16_v2f16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1707,17 +1707,17 @@ define void @s_shuffle_v2f16_v2f16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1749,17 +1749,17 @@ define void @s_shuffle_v2f16_v2f16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -1803,17 +1803,17 @@ define void @s_shuffle_v2f16_v2f16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1854,21 +1854,21 @@ define void @s_shuffle_v2f16_v2f16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1911,22 +1911,22 @@ define void @s_shuffle_v2f16_v2f16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> @@ -1957,17 +1957,17 @@ define void @s_shuffle_v2f16_v2f16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v2f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v2f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll index 9d27b5445598c..60d3b4a681dad 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f16_v3f16__u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v2f16_v3f16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v2f16_v3f16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -122,16 +122,16 @@ define void @v_shuffle_v2f16_v3f16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -176,17 +176,17 @@ define void @v_shuffle_v2f16_v3f16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -219,16 +219,16 @@ define void @v_shuffle_v2f16_v3f16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -271,22 +271,22 @@ define void @v_shuffle_v2f16_v3f16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -329,22 +329,22 @@ define void @v_shuffle_v2f16_v3f16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -387,22 +387,22 @@ define void @v_shuffle_v2f16_v3f16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -439,18 +439,18 @@ define void @v_shuffle_v2f16_v3f16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -487,18 +487,18 @@ define void @v_shuffle_v2f16_v3f16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -535,18 +535,18 @@ define void @v_shuffle_v2f16_v3f16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -581,17 +581,17 @@ define void @v_shuffle_v2f16_v3f16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v2f16_v3f16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer @@ -670,17 +670,17 @@ define void @v_shuffle_v2f16_v3f16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -715,18 +715,18 @@ define void @v_shuffle_v2f16_v3f16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -759,17 +759,17 @@ define void @v_shuffle_v2f16_v3f16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -808,21 +808,21 @@ define void @v_shuffle_v2f16_v3f16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -855,16 +855,16 @@ define void @v_shuffle_v2f16_v3f16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -895,16 +895,16 @@ define void @v_shuffle_v2f16_v3f16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -939,18 +939,18 @@ define void @v_shuffle_v2f16_v3f16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -985,18 +985,18 @@ define void @v_shuffle_v2f16_v3f16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1027,16 +1027,16 @@ define void @v_shuffle_v2f16_v3f16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1077,22 +1077,22 @@ define void @v_shuffle_v2f16_v3f16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1127,17 +1127,17 @@ define void @v_shuffle_v2f16_v3f16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1172,18 +1172,18 @@ define void @v_shuffle_v2f16_v3f16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1216,17 +1216,17 @@ define void @v_shuffle_v2f16_v3f16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1261,18 +1261,18 @@ define void @v_shuffle_v2f16_v3f16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1305,17 +1305,17 @@ define void @v_shuffle_v2f16_v3f16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1354,21 +1354,21 @@ define void @v_shuffle_v2f16_v3f16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1413,16 +1413,16 @@ define void @v_shuffle_v2f16_v3f16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1455,17 +1455,17 @@ define void @v_shuffle_v2f16_v3f16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1496,16 +1496,16 @@ define void @v_shuffle_v2f16_v3f16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -1550,17 +1550,17 @@ define void @v_shuffle_v2f16_v3f16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1593,16 +1593,16 @@ define void @v_shuffle_v2f16_v3f16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1645,22 +1645,22 @@ define void @v_shuffle_v2f16_v3f16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1703,22 +1703,22 @@ define void @v_shuffle_v2f16_v3f16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1761,22 +1761,22 @@ define void @v_shuffle_v2f16_v3f16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1809,16 +1809,16 @@ define void @v_shuffle_v2f16_v3f16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1855,18 +1855,18 @@ define void @v_shuffle_v2f16_v3f16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1901,17 +1901,17 @@ define void @v_shuffle_v2f16_v3f16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1954,22 +1954,22 @@ define void @v_shuffle_v2f16_v3f16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2010,21 +2010,21 @@ define void @v_shuffle_v2f16_v3f16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2067,22 +2067,22 @@ define void @v_shuffle_v2f16_v3f16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2119,18 +2119,18 @@ define void @v_shuffle_v2f16_v3f16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2165,17 +2165,17 @@ define void @v_shuffle_v2f16_v3f16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v3f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2223,17 +2223,17 @@ define void @s_shuffle_v2f16_v3f16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2266,17 +2266,17 @@ define void @s_shuffle_v2f16_v3f16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2309,17 +2309,17 @@ define void @s_shuffle_v2f16_v3f16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2367,17 +2367,17 @@ define void @s_shuffle_v2f16_v3f16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2412,17 +2412,17 @@ define void @s_shuffle_v2f16_v3f16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2463,20 +2463,20 @@ define void @s_shuffle_v2f16_v3f16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2519,21 +2519,21 @@ define void @s_shuffle_v2f16_v3f16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2574,20 +2574,20 @@ define void @s_shuffle_v2f16_v3f16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2622,17 +2622,17 @@ define void @s_shuffle_v2f16_v3f16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2669,18 +2669,18 @@ define void @s_shuffle_v2f16_v3f16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2715,17 +2715,17 @@ define void @s_shuffle_v2f16_v3f16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2760,17 +2760,17 @@ define void @s_shuffle_v2f16_v3f16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2803,17 +2803,17 @@ define void @s_shuffle_v2f16_v3f16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer @@ -2848,18 +2848,18 @@ define void @s_shuffle_v2f16_v3f16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2892,17 +2892,17 @@ define void @s_shuffle_v2f16_v3f16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2935,17 +2935,17 @@ define void @s_shuffle_v2f16_v3f16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -2986,21 +2986,21 @@ define void @s_shuffle_v2f16_v3f16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3033,17 +3033,17 @@ define void @s_shuffle_v2f16_v3f16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3074,17 +3074,17 @@ define void @s_shuffle_v2f16_v3f16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3119,18 +3119,18 @@ define void @s_shuffle_v2f16_v3f16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3165,18 +3165,18 @@ define void @s_shuffle_v2f16_v3f16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3207,17 +3207,17 @@ define void @s_shuffle_v2f16_v3f16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3260,22 +3260,22 @@ define void @s_shuffle_v2f16_v3f16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3310,17 +3310,17 @@ define void @s_shuffle_v2f16_v3f16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3353,17 +3353,17 @@ define void @s_shuffle_v2f16_v3f16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3398,18 +3398,18 @@ define void @s_shuffle_v2f16_v3f16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3442,17 +3442,17 @@ define void @s_shuffle_v2f16_v3f16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3485,17 +3485,17 @@ define void @s_shuffle_v2f16_v3f16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3536,21 +3536,21 @@ define void @s_shuffle_v2f16_v3f16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3598,17 +3598,17 @@ define void @s_shuffle_v2f16_v3f16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3641,17 +3641,17 @@ define void @s_shuffle_v2f16_v3f16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3684,17 +3684,17 @@ define void @s_shuffle_v2f16_v3f16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> @@ -3744,18 +3744,18 @@ define void @s_shuffle_v2f16_v3f16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3788,17 +3788,17 @@ define void @s_shuffle_v2f16_v3f16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3841,21 +3841,21 @@ define void @s_shuffle_v2f16_v3f16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3900,22 +3900,22 @@ define void @s_shuffle_v2f16_v3f16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3958,21 +3958,21 @@ define void @s_shuffle_v2f16_v3f16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4005,17 +4005,17 @@ define void @s_shuffle_v2f16_v3f16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4052,18 +4052,18 @@ define void @s_shuffle_v2f16_v3f16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4098,17 +4098,17 @@ define void @s_shuffle_v2f16_v3f16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4149,20 +4149,20 @@ define void @s_shuffle_v2f16_v3f16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4205,21 +4205,21 @@ define void @s_shuffle_v2f16_v3f16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4260,20 +4260,20 @@ define void @s_shuffle_v2f16_v3f16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4308,17 +4308,17 @@ define void @s_shuffle_v2f16_v3f16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4355,18 +4355,18 @@ define void @s_shuffle_v2f16_v3f16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v3f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v4f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v4f16.ll index 42dc9d357d51c..21c1e198d9ae7 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v4f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v4f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f16_v4f16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f16_v4f16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2f16_v4f16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2f16_v4f16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2f16_v4f16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -213,17 +213,17 @@ define void @v_shuffle_v2f16_v4f16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v2f16_v4f16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -296,17 +296,17 @@ define void @v_shuffle_v2f16_v4f16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -345,21 +345,21 @@ define void @v_shuffle_v2f16_v4f16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -400,22 +400,22 @@ define void @v_shuffle_v2f16_v4f16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -454,21 +454,21 @@ define void @v_shuffle_v2f16_v4f16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -509,22 +509,22 @@ define void @v_shuffle_v2f16_v4f16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -557,17 +557,17 @@ define void @v_shuffle_v2f16_v4f16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -602,18 +602,18 @@ define void @v_shuffle_v2f16_v4f16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -646,17 +646,17 @@ define void @v_shuffle_v2f16_v4f16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -691,18 +691,18 @@ define void @v_shuffle_v2f16_v4f16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -735,17 +735,17 @@ define void @v_shuffle_v2f16_v4f16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -779,18 +779,18 @@ define void @v_shuffle_v2f16_v4f16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> zeroinitializer store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -822,17 +822,17 @@ define void @v_shuffle_v2f16_v4f16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -866,18 +866,18 @@ define void @v_shuffle_v2f16_v4f16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -909,17 +909,17 @@ define void @v_shuffle_v2f16_v4f16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -951,17 +951,17 @@ define void @v_shuffle_v2f16_v4f16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -999,21 +999,21 @@ define void @v_shuffle_v2f16_v4f16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1054,22 +1054,22 @@ define void @v_shuffle_v2f16_v4f16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1100,16 +1100,16 @@ define void @v_shuffle_v2f16_v4f16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1139,16 +1139,16 @@ define void @v_shuffle_v2f16_v4f16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1182,18 +1182,18 @@ define void @v_shuffle_v2f16_v4f16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1227,18 +1227,18 @@ define void @v_shuffle_v2f16_v4f16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1272,18 +1272,18 @@ define void @v_shuffle_v2f16_v4f16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1313,16 +1313,16 @@ define void @v_shuffle_v2f16_v4f16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1362,22 +1362,22 @@ define void @v_shuffle_v2f16_v4f16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1418,22 +1418,22 @@ define void @v_shuffle_v2f16_v4f16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1466,17 +1466,17 @@ define void @v_shuffle_v2f16_v4f16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1510,18 +1510,18 @@ define void @v_shuffle_v2f16_v4f16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1553,17 +1553,17 @@ define void @v_shuffle_v2f16_v4f16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1597,18 +1597,18 @@ define void @v_shuffle_v2f16_v4f16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1640,17 +1640,17 @@ define void @v_shuffle_v2f16_v4f16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v2f16_v4f16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1730,21 +1730,21 @@ define void @v_shuffle_v2f16_v4f16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1785,22 +1785,22 @@ define void @v_shuffle_v2f16_v4f16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -1831,16 +1831,16 @@ define void @v_shuffle_v2f16_v4f16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1874,18 +1874,18 @@ define void @v_shuffle_v2f16_v4f16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1919,18 +1919,18 @@ define void @v_shuffle_v2f16_v4f16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1960,16 +1960,16 @@ define void @v_shuffle_v2f16_v4f16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2003,18 +2003,18 @@ define void @v_shuffle_v2f16_v4f16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2044,16 +2044,16 @@ define void @v_shuffle_v2f16_v4f16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2093,22 +2093,22 @@ define void @v_shuffle_v2f16_v4f16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2149,22 +2149,22 @@ define void @v_shuffle_v2f16_v4f16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2206,16 +2206,16 @@ define void @v_shuffle_v2f16_v4f16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2247,17 +2247,17 @@ define void @v_shuffle_v2f16_v4f16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2287,16 +2287,16 @@ define void @v_shuffle_v2f16_v4f16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2328,17 +2328,17 @@ define void @v_shuffle_v2f16_v4f16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2381,17 +2381,17 @@ define void @v_shuffle_v2f16_v4f16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2426,18 +2426,18 @@ define void @v_shuffle_v2f16_v4f16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2468,16 +2468,16 @@ define void @v_shuffle_v2f16_v4f16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2518,22 +2518,22 @@ define void @v_shuffle_v2f16_v4f16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2574,22 +2574,22 @@ define void @v_shuffle_v2f16_v4f16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2630,22 +2630,22 @@ define void @v_shuffle_v2f16_v4f16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2686,22 +2686,22 @@ define void @v_shuffle_v2f16_v4f16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2732,16 +2732,16 @@ define void @v_shuffle_v2f16_v4f16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2776,18 +2776,18 @@ define void @v_shuffle_v2f16_v4f16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2822,18 +2822,18 @@ define void @v_shuffle_v2f16_v4f16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2866,17 +2866,17 @@ define void @v_shuffle_v2f16_v4f16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2917,22 +2917,22 @@ define void @v_shuffle_v2f16_v4f16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -2971,21 +2971,21 @@ define void @v_shuffle_v2f16_v4f16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3026,22 +3026,22 @@ define void @v_shuffle_v2f16_v4f16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3080,21 +3080,21 @@ define void @v_shuffle_v2f16_v4f16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3129,18 +3129,18 @@ define void @v_shuffle_v2f16_v4f16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3173,17 +3173,17 @@ define void @v_shuffle_v2f16_v4f16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3218,18 +3218,18 @@ define void @v_shuffle_v2f16_v4f16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3260,16 +3260,16 @@ define void @v_shuffle_v2f16_v4f16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3310,22 +3310,22 @@ define void @v_shuffle_v2f16_v4f16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3366,22 +3366,22 @@ define void @v_shuffle_v2f16_v4f16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3422,22 +3422,22 @@ define void @v_shuffle_v2f16_v4f16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3478,22 +3478,22 @@ define void @v_shuffle_v2f16_v4f16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3528,18 +3528,18 @@ define void @v_shuffle_v2f16_v4f16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3574,18 +3574,18 @@ define void @v_shuffle_v2f16_v4f16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3616,16 +3616,16 @@ define void @v_shuffle_v2f16_v4f16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v4f16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v4f16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3670,17 +3670,17 @@ define void @s_shuffle_v2f16_v4f16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -3712,17 +3712,17 @@ define void @s_shuffle_v2f16_v4f16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -3754,17 +3754,17 @@ define void @s_shuffle_v2f16_v4f16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -3796,17 +3796,17 @@ define void @s_shuffle_v2f16_v4f16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -3852,17 +3852,17 @@ define void @s_shuffle_v2f16_v4f16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3895,17 +3895,17 @@ define void @s_shuffle_v2f16_v4f16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3938,17 +3938,17 @@ define void @s_shuffle_v2f16_v4f16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -3989,21 +3989,21 @@ define void @s_shuffle_v2f16_v4f16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4046,22 +4046,22 @@ define void @s_shuffle_v2f16_v4f16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4102,21 +4102,21 @@ define void @s_shuffle_v2f16_v4f16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4159,22 +4159,22 @@ define void @s_shuffle_v2f16_v4f16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4209,18 +4209,18 @@ define void @s_shuffle_v2f16_v4f16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4257,19 +4257,19 @@ define void @s_shuffle_v2f16_v4f16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4304,18 +4304,18 @@ define void @s_shuffle_v2f16_v4f16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4350,18 +4350,18 @@ define void @s_shuffle_v2f16_v4f16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4394,17 +4394,17 @@ define void @s_shuffle_v2f16_v4f16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4436,17 +4436,17 @@ define void @s_shuffle_v2f16_v4f16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4480,18 +4480,18 @@ define void @s_shuffle_v2f16_v4f16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4523,17 +4523,17 @@ define void @s_shuffle_v2f16_v4f16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4567,18 +4567,18 @@ define void @s_shuffle_v2f16_v4f16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4610,17 +4610,17 @@ define void @s_shuffle_v2f16_v4f16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4660,21 +4660,21 @@ define void @s_shuffle_v2f16_v4f16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4713,20 +4713,20 @@ define void @s_shuffle_v2f16_v4f16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -4757,17 +4757,17 @@ define void @s_shuffle_v2f16_v4f16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4797,17 +4797,17 @@ define void @s_shuffle_v2f16_v4f16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4841,18 +4841,18 @@ define void @s_shuffle_v2f16_v4f16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4886,18 +4886,18 @@ define void @s_shuffle_v2f16_v4f16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4933,19 +4933,19 @@ define void @s_shuffle_v2f16_v4f16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -4975,17 +4975,17 @@ define void @s_shuffle_v2f16_v4f16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5027,22 +5027,22 @@ define void @s_shuffle_v2f16_v4f16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5083,21 +5083,21 @@ define void @s_shuffle_v2f16_v4f16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5130,17 +5130,17 @@ define void @s_shuffle_v2f16_v4f16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5172,17 +5172,17 @@ define void @s_shuffle_v2f16_v4f16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5216,18 +5216,18 @@ define void @s_shuffle_v2f16_v4f16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5259,17 +5259,17 @@ define void @s_shuffle_v2f16_v4f16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5303,18 +5303,18 @@ define void @s_shuffle_v2f16_v4f16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5346,17 +5346,17 @@ define void @s_shuffle_v2f16_v4f16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5396,21 +5396,21 @@ define void @s_shuffle_v2f16_v4f16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5449,20 +5449,20 @@ define void @s_shuffle_v2f16_v4f16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5495,17 +5495,17 @@ define void @s_shuffle_v2f16_v4f16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5539,18 +5539,18 @@ define void @s_shuffle_v2f16_v4f16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5586,19 +5586,19 @@ define void @s_shuffle_v2f16_v4f16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5630,17 +5630,17 @@ define void @s_shuffle_v2f16_v4f16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5674,18 +5674,18 @@ define void @s_shuffle_v2f16_v4f16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5717,17 +5717,17 @@ define void @s_shuffle_v2f16_v4f16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5769,22 +5769,22 @@ define void @s_shuffle_v2f16_v4f16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5825,21 +5825,21 @@ define void @s_shuffle_v2f16_v4f16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -5884,17 +5884,17 @@ define void @s_shuffle_v2f16_v4f16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5926,17 +5926,17 @@ define void @s_shuffle_v2f16_v4f16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -5968,17 +5968,17 @@ define void @s_shuffle_v2f16_v4f16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -6010,17 +6010,17 @@ define void @s_shuffle_v2f16_v4f16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -6068,18 +6068,18 @@ define void @s_shuffle_v2f16_v4f16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6112,17 +6112,17 @@ define void @s_shuffle_v2f16_v4f16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6153,17 +6153,17 @@ define void @s_shuffle_v2f16_v4f16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6204,21 +6204,21 @@ define void @s_shuffle_v2f16_v4f16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6261,22 +6261,22 @@ define void @s_shuffle_v2f16_v4f16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6317,21 +6317,21 @@ define void @s_shuffle_v2f16_v4f16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6374,22 +6374,22 @@ define void @s_shuffle_v2f16_v4f16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6420,17 +6420,17 @@ define void @s_shuffle_v2f16_v4f16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6465,18 +6465,18 @@ define void @s_shuffle_v2f16_v4f16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6511,18 +6511,18 @@ define void @s_shuffle_v2f16_v4f16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6555,17 +6555,17 @@ define void @s_shuffle_v2f16_v4f16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6604,20 +6604,20 @@ define void @s_shuffle_v2f16_v4f16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6658,21 +6658,21 @@ define void @s_shuffle_v2f16_v4f16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6711,20 +6711,20 @@ define void @s_shuffle_v2f16_v4f16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6765,21 +6765,21 @@ define void @s_shuffle_v2f16_v4f16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6812,17 +6812,17 @@ define void @s_shuffle_v2f16_v4f16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6857,18 +6857,18 @@ define void @s_shuffle_v2f16_v4f16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6901,17 +6901,17 @@ define void @s_shuffle_v2f16_v4f16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6944,17 +6944,17 @@ define void @s_shuffle_v2f16_v4f16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -6995,21 +6995,21 @@ define void @s_shuffle_v2f16_v4f16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7052,22 +7052,22 @@ define void @s_shuffle_v2f16_v4f16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7108,21 +7108,21 @@ define void @s_shuffle_v2f16_v4f16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7165,22 +7165,22 @@ define void @s_shuffle_v2f16_v4f16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7215,18 +7215,18 @@ define void @s_shuffle_v2f16_v4f16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7263,19 +7263,19 @@ define void @s_shuffle_v2f16_v4f16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> @@ -7308,17 +7308,17 @@ define void @s_shuffle_v2f16_v4f16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v4f16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v4f16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v8f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v8f16.ll index 99f05bbb7e3cc..c0eb441a7a1f2 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v8f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v8f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f16_v8f16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f16_v8f16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2f16_v8f16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2f16_v8f16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2f16_v8f16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -200,16 +200,16 @@ define void @v_shuffle_v2f16_v8f16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -241,17 +241,17 @@ define void @v_shuffle_v2f16_v8f16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -281,16 +281,16 @@ define void @v_shuffle_v2f16_v8f16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -322,17 +322,17 @@ define void @v_shuffle_v2f16_v8f16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -375,17 +375,17 @@ define void @v_shuffle_v2f16_v8f16__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -416,16 +416,16 @@ define void @v_shuffle_v2f16_v8f16__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -458,17 +458,17 @@ define void @v_shuffle_v2f16_v8f16__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -499,16 +499,16 @@ define void @v_shuffle_v2f16_v8f16__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -541,17 +541,17 @@ define void @v_shuffle_v2f16_v8f16__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -582,16 +582,16 @@ define void @v_shuffle_v2f16_v8f16__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -624,17 +624,17 @@ define void @v_shuffle_v2f16_v8f16__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -673,21 +673,21 @@ define void @v_shuffle_v2f16_v8f16__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -728,22 +728,22 @@ define void @v_shuffle_v2f16_v8f16__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -782,21 +782,21 @@ define void @v_shuffle_v2f16_v8f16__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -837,22 +837,22 @@ define void @v_shuffle_v2f16_v8f16__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -891,21 +891,21 @@ define void @v_shuffle_v2f16_v8f16__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -946,22 +946,22 @@ define void @v_shuffle_v2f16_v8f16__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1000,21 +1000,21 @@ define void @v_shuffle_v2f16_v8f16__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1055,22 +1055,22 @@ define void @v_shuffle_v2f16_v8f16__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1103,17 +1103,17 @@ define void @v_shuffle_v2f16_v8f16__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1148,18 +1148,18 @@ define void @v_shuffle_v2f16_v8f16__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1192,17 +1192,17 @@ define void @v_shuffle_v2f16_v8f16__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1237,18 +1237,18 @@ define void @v_shuffle_v2f16_v8f16__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1281,17 +1281,17 @@ define void @v_shuffle_v2f16_v8f16__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1326,18 +1326,18 @@ define void @v_shuffle_v2f16_v8f16__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1370,17 +1370,17 @@ define void @v_shuffle_v2f16_v8f16__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1415,18 +1415,18 @@ define void @v_shuffle_v2f16_v8f16__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1459,17 +1459,17 @@ define void @v_shuffle_v2f16_v8f16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1503,18 +1503,18 @@ define void @v_shuffle_v2f16_v8f16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> zeroinitializer store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1546,17 +1546,17 @@ define void @v_shuffle_v2f16_v8f16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1590,18 +1590,18 @@ define void @v_shuffle_v2f16_v8f16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1633,17 +1633,17 @@ define void @v_shuffle_v2f16_v8f16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1677,18 +1677,18 @@ define void @v_shuffle_v2f16_v8f16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1720,17 +1720,17 @@ define void @v_shuffle_v2f16_v8f16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1764,18 +1764,18 @@ define void @v_shuffle_v2f16_v8f16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1807,17 +1807,17 @@ define void @v_shuffle_v2f16_v8f16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1849,17 +1849,17 @@ define void @v_shuffle_v2f16_v8f16__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1897,21 +1897,21 @@ define void @v_shuffle_v2f16_v8f16__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -1952,22 +1952,22 @@ define void @v_shuffle_v2f16_v8f16__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2006,21 +2006,21 @@ define void @v_shuffle_v2f16_v8f16__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2061,22 +2061,22 @@ define void @v_shuffle_v2f16_v8f16__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2115,21 +2115,21 @@ define void @v_shuffle_v2f16_v8f16__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2170,22 +2170,22 @@ define void @v_shuffle_v2f16_v8f16__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2216,16 +2216,16 @@ define void @v_shuffle_v2f16_v8f16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2255,16 +2255,16 @@ define void @v_shuffle_v2f16_v8f16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2298,18 +2298,18 @@ define void @v_shuffle_v2f16_v8f16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2343,18 +2343,18 @@ define void @v_shuffle_v2f16_v8f16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v2f16_v8f16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2433,18 +2433,18 @@ define void @v_shuffle_v2f16_v8f16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2478,18 +2478,18 @@ define void @v_shuffle_v2f16_v8f16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2523,18 +2523,18 @@ define void @v_shuffle_v2f16_v8f16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2568,18 +2568,18 @@ define void @v_shuffle_v2f16_v8f16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2609,16 +2609,16 @@ define void @v_shuffle_v2f16_v8f16__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2658,22 +2658,22 @@ define void @v_shuffle_v2f16_v8f16__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2714,22 +2714,22 @@ define void @v_shuffle_v2f16_v8f16__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v2f16_v8f16__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2826,22 +2826,22 @@ define void @v_shuffle_v2f16_v8f16__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2882,22 +2882,22 @@ define void @v_shuffle_v2f16_v8f16__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2938,22 +2938,22 @@ define void @v_shuffle_v2f16_v8f16__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -2986,17 +2986,17 @@ define void @v_shuffle_v2f16_v8f16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3030,18 +3030,18 @@ define void @v_shuffle_v2f16_v8f16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3073,17 +3073,17 @@ define void @v_shuffle_v2f16_v8f16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3117,18 +3117,18 @@ define void @v_shuffle_v2f16_v8f16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3160,17 +3160,17 @@ define void @v_shuffle_v2f16_v8f16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3204,18 +3204,18 @@ define void @v_shuffle_v2f16_v8f16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3247,17 +3247,17 @@ define void @v_shuffle_v2f16_v8f16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3291,18 +3291,18 @@ define void @v_shuffle_v2f16_v8f16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3334,17 +3334,17 @@ define void @v_shuffle_v2f16_v8f16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3376,17 +3376,17 @@ define void @v_shuffle_v2f16_v8f16__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3424,21 +3424,21 @@ define void @v_shuffle_v2f16_v8f16__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3479,22 +3479,22 @@ define void @v_shuffle_v2f16_v8f16__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3533,21 +3533,21 @@ define void @v_shuffle_v2f16_v8f16__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3588,22 +3588,22 @@ define void @v_shuffle_v2f16_v8f16__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3642,21 +3642,21 @@ define void @v_shuffle_v2f16_v8f16__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3697,22 +3697,22 @@ define void @v_shuffle_v2f16_v8f16__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -3743,16 +3743,16 @@ define void @v_shuffle_v2f16_v8f16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3786,18 +3786,18 @@ define void @v_shuffle_v2f16_v8f16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3831,18 +3831,18 @@ define void @v_shuffle_v2f16_v8f16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3872,16 +3872,16 @@ define void @v_shuffle_v2f16_v8f16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3915,18 +3915,18 @@ define void @v_shuffle_v2f16_v8f16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3960,18 +3960,18 @@ define void @v_shuffle_v2f16_v8f16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4005,18 +4005,18 @@ define void @v_shuffle_v2f16_v8f16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4050,18 +4050,18 @@ define void @v_shuffle_v2f16_v8f16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4095,18 +4095,18 @@ define void @v_shuffle_v2f16_v8f16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4136,16 +4136,16 @@ define void @v_shuffle_v2f16_v8f16__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4185,22 +4185,22 @@ define void @v_shuffle_v2f16_v8f16__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4241,22 +4241,22 @@ define void @v_shuffle_v2f16_v8f16__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4297,22 +4297,22 @@ define void @v_shuffle_v2f16_v8f16__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4353,22 +4353,22 @@ define void @v_shuffle_v2f16_v8f16__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4409,22 +4409,22 @@ define void @v_shuffle_v2f16_v8f16__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4465,22 +4465,22 @@ define void @v_shuffle_v2f16_v8f16__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -4513,17 +4513,17 @@ define void @v_shuffle_v2f16_v8f16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4557,18 +4557,18 @@ define void @v_shuffle_v2f16_v8f16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4600,17 +4600,17 @@ define void @v_shuffle_v2f16_v8f16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4644,18 +4644,18 @@ define void @v_shuffle_v2f16_v8f16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4687,17 +4687,17 @@ define void @v_shuffle_v2f16_v8f16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4731,18 +4731,18 @@ define void @v_shuffle_v2f16_v8f16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4774,17 +4774,17 @@ define void @v_shuffle_v2f16_v8f16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4818,18 +4818,18 @@ define void @v_shuffle_v2f16_v8f16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4861,17 +4861,17 @@ define void @v_shuffle_v2f16_v8f16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4903,17 +4903,17 @@ define void @v_shuffle_v2f16_v8f16__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4951,21 +4951,21 @@ define void @v_shuffle_v2f16_v8f16__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5006,22 +5006,22 @@ define void @v_shuffle_v2f16_v8f16__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5060,21 +5060,21 @@ define void @v_shuffle_v2f16_v8f16__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5115,22 +5115,22 @@ define void @v_shuffle_v2f16_v8f16__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5169,21 +5169,21 @@ define void @v_shuffle_v2f16_v8f16__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v2f16_v8f16__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5270,16 +5270,16 @@ define void @v_shuffle_v2f16_v8f16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5313,18 +5313,18 @@ define void @v_shuffle_v2f16_v8f16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5358,18 +5358,18 @@ define void @v_shuffle_v2f16_v8f16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5403,18 +5403,18 @@ define void @v_shuffle_v2f16_v8f16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5448,18 +5448,18 @@ define void @v_shuffle_v2f16_v8f16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5489,16 +5489,16 @@ define void @v_shuffle_v2f16_v8f16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5532,18 +5532,18 @@ define void @v_shuffle_v2f16_v8f16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5577,18 +5577,18 @@ define void @v_shuffle_v2f16_v8f16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5622,18 +5622,18 @@ define void @v_shuffle_v2f16_v8f16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5663,16 +5663,16 @@ define void @v_shuffle_v2f16_v8f16__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5712,22 +5712,22 @@ define void @v_shuffle_v2f16_v8f16__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5768,22 +5768,22 @@ define void @v_shuffle_v2f16_v8f16__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5824,22 +5824,22 @@ define void @v_shuffle_v2f16_v8f16__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5880,22 +5880,22 @@ define void @v_shuffle_v2f16_v8f16__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5936,22 +5936,22 @@ define void @v_shuffle_v2f16_v8f16__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -5992,22 +5992,22 @@ define void @v_shuffle_v2f16_v8f16__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6040,17 +6040,17 @@ define void @v_shuffle_v2f16_v8f16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6084,18 +6084,18 @@ define void @v_shuffle_v2f16_v8f16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6127,17 +6127,17 @@ define void @v_shuffle_v2f16_v8f16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6171,18 +6171,18 @@ define void @v_shuffle_v2f16_v8f16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6214,17 +6214,17 @@ define void @v_shuffle_v2f16_v8f16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6258,18 +6258,18 @@ define void @v_shuffle_v2f16_v8f16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6301,17 +6301,17 @@ define void @v_shuffle_v2f16_v8f16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6345,18 +6345,18 @@ define void @v_shuffle_v2f16_v8f16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6388,17 +6388,17 @@ define void @v_shuffle_v2f16_v8f16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6430,17 +6430,17 @@ define void @v_shuffle_v2f16_v8f16__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6478,21 +6478,21 @@ define void @v_shuffle_v2f16_v8f16__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6533,22 +6533,22 @@ define void @v_shuffle_v2f16_v8f16__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6587,21 +6587,21 @@ define void @v_shuffle_v2f16_v8f16__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6642,22 +6642,22 @@ define void @v_shuffle_v2f16_v8f16__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6696,21 +6696,21 @@ define void @v_shuffle_v2f16_v8f16__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6751,22 +6751,22 @@ define void @v_shuffle_v2f16_v8f16__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -6797,16 +6797,16 @@ define void @v_shuffle_v2f16_v8f16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6840,18 +6840,18 @@ define void @v_shuffle_v2f16_v8f16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6885,18 +6885,18 @@ define void @v_shuffle_v2f16_v8f16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6930,18 +6930,18 @@ define void @v_shuffle_v2f16_v8f16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6975,18 +6975,18 @@ define void @v_shuffle_v2f16_v8f16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7020,18 +7020,18 @@ define void @v_shuffle_v2f16_v8f16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7065,18 +7065,18 @@ define void @v_shuffle_v2f16_v8f16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7106,16 +7106,16 @@ define void @v_shuffle_v2f16_v8f16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7149,18 +7149,18 @@ define void @v_shuffle_v2f16_v8f16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7190,16 +7190,16 @@ define void @v_shuffle_v2f16_v8f16__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7239,22 +7239,22 @@ define void @v_shuffle_v2f16_v8f16__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7295,22 +7295,22 @@ define void @v_shuffle_v2f16_v8f16__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7351,22 +7351,22 @@ define void @v_shuffle_v2f16_v8f16__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7407,22 +7407,22 @@ define void @v_shuffle_v2f16_v8f16__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7463,22 +7463,22 @@ define void @v_shuffle_v2f16_v8f16__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7519,22 +7519,22 @@ define void @v_shuffle_v2f16_v8f16__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7576,16 +7576,16 @@ define void @v_shuffle_v2f16_v8f16__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7617,17 +7617,17 @@ define void @v_shuffle_v2f16_v8f16__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7657,16 +7657,16 @@ define void @v_shuffle_v2f16_v8f16__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7698,17 +7698,17 @@ define void @v_shuffle_v2f16_v8f16__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7738,16 +7738,16 @@ define void @v_shuffle_v2f16_v8f16__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7779,17 +7779,17 @@ define void @v_shuffle_v2f16_v8f16__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7819,16 +7819,16 @@ define void @v_shuffle_v2f16_v8f16__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7860,17 +7860,17 @@ define void @v_shuffle_v2f16_v8f16__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7913,17 +7913,17 @@ define void @v_shuffle_v2f16_v8f16__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -7958,18 +7958,18 @@ define void @v_shuffle_v2f16_v8f16__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8002,17 +8002,17 @@ define void @v_shuffle_v2f16_v8f16__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8047,18 +8047,18 @@ define void @v_shuffle_v2f16_v8f16__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8091,17 +8091,17 @@ define void @v_shuffle_v2f16_v8f16__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8136,18 +8136,18 @@ define void @v_shuffle_v2f16_v8f16__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8178,16 +8178,16 @@ define void @v_shuffle_v2f16_v8f16__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8228,22 +8228,22 @@ define void @v_shuffle_v2f16_v8f16__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8284,22 +8284,22 @@ define void @v_shuffle_v2f16_v8f16__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8340,22 +8340,22 @@ define void @v_shuffle_v2f16_v8f16__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8396,22 +8396,22 @@ define void @v_shuffle_v2f16_v8f16__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8452,22 +8452,22 @@ define void @v_shuffle_v2f16_v8f16__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8508,22 +8508,22 @@ define void @v_shuffle_v2f16_v8f16__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8564,22 +8564,22 @@ define void @v_shuffle_v2f16_v8f16__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8620,22 +8620,22 @@ define void @v_shuffle_v2f16_v8f16__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8666,16 +8666,16 @@ define void @v_shuffle_v2f16_v8f16__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8710,18 +8710,18 @@ define void @v_shuffle_v2f16_v8f16__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8756,18 +8756,18 @@ define void @v_shuffle_v2f16_v8f16__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8802,18 +8802,18 @@ define void @v_shuffle_v2f16_v8f16__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8848,18 +8848,18 @@ define void @v_shuffle_v2f16_v8f16__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8894,18 +8894,18 @@ define void @v_shuffle_v2f16_v8f16__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8940,18 +8940,18 @@ define void @v_shuffle_v2f16_v8f16__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -8984,17 +8984,17 @@ define void @v_shuffle_v2f16_v8f16__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9035,22 +9035,22 @@ define void @v_shuffle_v2f16_v8f16__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9089,21 +9089,21 @@ define void @v_shuffle_v2f16_v8f16__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9144,22 +9144,22 @@ define void @v_shuffle_v2f16_v8f16__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9198,21 +9198,21 @@ define void @v_shuffle_v2f16_v8f16__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9253,22 +9253,22 @@ define void @v_shuffle_v2f16_v8f16__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9307,21 +9307,21 @@ define void @v_shuffle_v2f16_v8f16__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9362,22 +9362,22 @@ define void @v_shuffle_v2f16_v8f16__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9416,21 +9416,21 @@ define void @v_shuffle_v2f16_v8f16__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9465,18 +9465,18 @@ define void @v_shuffle_v2f16_v8f16__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9509,17 +9509,17 @@ define void @v_shuffle_v2f16_v8f16__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9554,18 +9554,18 @@ define void @v_shuffle_v2f16_v8f16__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9598,17 +9598,17 @@ define void @v_shuffle_v2f16_v8f16__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9643,18 +9643,18 @@ define void @v_shuffle_v2f16_v8f16__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9687,17 +9687,17 @@ define void @v_shuffle_v2f16_v8f16__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9732,18 +9732,18 @@ define void @v_shuffle_v2f16_v8f16__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9774,16 +9774,16 @@ define void @v_shuffle_v2f16_v8f16__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9824,22 +9824,22 @@ define void @v_shuffle_v2f16_v8f16__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9880,22 +9880,22 @@ define void @v_shuffle_v2f16_v8f16__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9936,22 +9936,22 @@ define void @v_shuffle_v2f16_v8f16__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -9992,22 +9992,22 @@ define void @v_shuffle_v2f16_v8f16__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10048,22 +10048,22 @@ define void @v_shuffle_v2f16_v8f16__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10104,22 +10104,22 @@ define void @v_shuffle_v2f16_v8f16__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10160,22 +10160,22 @@ define void @v_shuffle_v2f16_v8f16__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10216,22 +10216,22 @@ define void @v_shuffle_v2f16_v8f16__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10266,18 +10266,18 @@ define void @v_shuffle_v2f16_v8f16__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10312,18 +10312,18 @@ define void @v_shuffle_v2f16_v8f16__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10354,16 +10354,16 @@ define void @v_shuffle_v2f16_v8f16__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10398,18 +10398,18 @@ define void @v_shuffle_v2f16_v8f16__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10444,18 +10444,18 @@ define void @v_shuffle_v2f16_v8f16__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10490,18 +10490,18 @@ define void @v_shuffle_v2f16_v8f16__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10536,18 +10536,18 @@ define void @v_shuffle_v2f16_v8f16__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10580,17 +10580,17 @@ define void @v_shuffle_v2f16_v8f16__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10631,22 +10631,22 @@ define void @v_shuffle_v2f16_v8f16__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10685,21 +10685,21 @@ define void @v_shuffle_v2f16_v8f16__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10740,22 +10740,22 @@ define void @v_shuffle_v2f16_v8f16__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10794,21 +10794,21 @@ define void @v_shuffle_v2f16_v8f16__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10849,22 +10849,22 @@ define void @v_shuffle_v2f16_v8f16__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10903,21 +10903,21 @@ define void @v_shuffle_v2f16_v8f16__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -10958,22 +10958,22 @@ define void @v_shuffle_v2f16_v8f16__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11012,21 +11012,21 @@ define void @v_shuffle_v2f16_v8f16__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11061,18 +11061,18 @@ define void @v_shuffle_v2f16_v8f16__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11105,17 +11105,17 @@ define void @v_shuffle_v2f16_v8f16__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11150,18 +11150,18 @@ define void @v_shuffle_v2f16_v8f16__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11194,17 +11194,17 @@ define void @v_shuffle_v2f16_v8f16__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11239,18 +11239,18 @@ define void @v_shuffle_v2f16_v8f16__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11283,17 +11283,17 @@ define void @v_shuffle_v2f16_v8f16__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11328,18 +11328,18 @@ define void @v_shuffle_v2f16_v8f16__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11370,16 +11370,16 @@ define void @v_shuffle_v2f16_v8f16__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11420,22 +11420,22 @@ define void @v_shuffle_v2f16_v8f16__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11476,22 +11476,22 @@ define void @v_shuffle_v2f16_v8f16__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11532,22 +11532,22 @@ define void @v_shuffle_v2f16_v8f16__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11588,22 +11588,22 @@ define void @v_shuffle_v2f16_v8f16__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11644,22 +11644,22 @@ define void @v_shuffle_v2f16_v8f16__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11700,22 +11700,22 @@ define void @v_shuffle_v2f16_v8f16__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11756,22 +11756,22 @@ define void @v_shuffle_v2f16_v8f16__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11812,22 +11812,22 @@ define void @v_shuffle_v2f16_v8f16__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11862,18 +11862,18 @@ define void @v_shuffle_v2f16_v8f16__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11908,18 +11908,18 @@ define void @v_shuffle_v2f16_v8f16__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -11954,18 +11954,18 @@ define void @v_shuffle_v2f16_v8f16__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12000,18 +12000,18 @@ define void @v_shuffle_v2f16_v8f16__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12042,16 +12042,16 @@ define void @v_shuffle_v2f16_v8f16__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12086,18 +12086,18 @@ define void @v_shuffle_v2f16_v8f16__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12132,18 +12132,18 @@ define void @v_shuffle_v2f16_v8f16__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12176,17 +12176,17 @@ define void @v_shuffle_v2f16_v8f16__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12227,22 +12227,22 @@ define void @v_shuffle_v2f16_v8f16__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12281,21 +12281,21 @@ define void @v_shuffle_v2f16_v8f16__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12336,22 +12336,22 @@ define void @v_shuffle_v2f16_v8f16__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12390,21 +12390,21 @@ define void @v_shuffle_v2f16_v8f16__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12445,22 +12445,22 @@ define void @v_shuffle_v2f16_v8f16__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12499,21 +12499,21 @@ define void @v_shuffle_v2f16_v8f16__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12554,22 +12554,22 @@ define void @v_shuffle_v2f16_v8f16__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12608,21 +12608,21 @@ define void @v_shuffle_v2f16_v8f16__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12657,18 +12657,18 @@ define void @v_shuffle_v2f16_v8f16__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12701,17 +12701,17 @@ define void @v_shuffle_v2f16_v8f16__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12746,18 +12746,18 @@ define void @v_shuffle_v2f16_v8f16__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12790,17 +12790,17 @@ define void @v_shuffle_v2f16_v8f16__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12835,18 +12835,18 @@ define void @v_shuffle_v2f16_v8f16__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12879,17 +12879,17 @@ define void @v_shuffle_v2f16_v8f16__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12924,18 +12924,18 @@ define void @v_shuffle_v2f16_v8f16__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -12966,16 +12966,16 @@ define void @v_shuffle_v2f16_v8f16__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13016,22 +13016,22 @@ define void @v_shuffle_v2f16_v8f16__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13072,22 +13072,22 @@ define void @v_shuffle_v2f16_v8f16__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13128,22 +13128,22 @@ define void @v_shuffle_v2f16_v8f16__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13184,22 +13184,22 @@ define void @v_shuffle_v2f16_v8f16__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13240,22 +13240,22 @@ define void @v_shuffle_v2f16_v8f16__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13296,22 +13296,22 @@ define void @v_shuffle_v2f16_v8f16__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13352,22 +13352,22 @@ define void @v_shuffle_v2f16_v8f16__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13408,22 +13408,22 @@ define void @v_shuffle_v2f16_v8f16__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13458,18 +13458,18 @@ define void @v_shuffle_v2f16_v8f16__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13504,18 +13504,18 @@ define void @v_shuffle_v2f16_v8f16__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13550,18 +13550,18 @@ define void @v_shuffle_v2f16_v8f16__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13596,18 +13596,18 @@ define void @v_shuffle_v2f16_v8f16__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13642,18 +13642,18 @@ define void @v_shuffle_v2f16_v8f16__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13688,18 +13688,18 @@ define void @v_shuffle_v2f16_v8f16__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13730,16 +13730,16 @@ define void @v_shuffle_v2f16_v8f16__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f16_v8f16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f16_v8f16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=v"() %vec1 = call <8 x half> asm "; def $0", "=v"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -13784,17 +13784,17 @@ define void @s_shuffle_v2f16_v8f16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -13826,17 +13826,17 @@ define void @s_shuffle_v2f16_v8f16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -13868,17 +13868,17 @@ define void @s_shuffle_v2f16_v8f16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -13910,17 +13910,17 @@ define void @s_shuffle_v2f16_v8f16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -13952,17 +13952,17 @@ define void @s_shuffle_v2f16_v8f16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -13994,17 +13994,17 @@ define void @s_shuffle_v2f16_v8f16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -14036,17 +14036,17 @@ define void @s_shuffle_v2f16_v8f16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -14078,17 +14078,17 @@ define void @s_shuffle_v2f16_v8f16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -14134,17 +14134,17 @@ define void @s_shuffle_v2f16_v8f16__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14177,17 +14177,17 @@ define void @s_shuffle_v2f16_v8f16__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14220,17 +14220,17 @@ define void @s_shuffle_v2f16_v8f16__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14263,17 +14263,17 @@ define void @s_shuffle_v2f16_v8f16__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14306,17 +14306,17 @@ define void @s_shuffle_v2f16_v8f16__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14349,17 +14349,17 @@ define void @s_shuffle_v2f16_v8f16__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14392,17 +14392,17 @@ define void @s_shuffle_v2f16_v8f16__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14443,21 +14443,21 @@ define void @s_shuffle_v2f16_v8f16__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14500,22 +14500,22 @@ define void @s_shuffle_v2f16_v8f16__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14556,21 +14556,21 @@ define void @s_shuffle_v2f16_v8f16__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14613,22 +14613,22 @@ define void @s_shuffle_v2f16_v8f16__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14669,21 +14669,21 @@ define void @s_shuffle_v2f16_v8f16__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14726,22 +14726,22 @@ define void @s_shuffle_v2f16_v8f16__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14782,21 +14782,21 @@ define void @s_shuffle_v2f16_v8f16__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14839,22 +14839,22 @@ define void @s_shuffle_v2f16_v8f16__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14889,18 +14889,18 @@ define void @s_shuffle_v2f16_v8f16__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14937,19 +14937,19 @@ define void @s_shuffle_v2f16_v8f16__15_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -14984,18 +14984,18 @@ define void @s_shuffle_v2f16_v8f16__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15032,19 +15032,19 @@ define void @s_shuffle_v2f16_v8f16__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15079,18 +15079,18 @@ define void @s_shuffle_v2f16_v8f16__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15127,19 +15127,19 @@ define void @s_shuffle_v2f16_v8f16__15_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15174,18 +15174,18 @@ define void @s_shuffle_v2f16_v8f16__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15220,18 +15220,18 @@ define void @s_shuffle_v2f16_v8f16__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15264,17 +15264,17 @@ define void @s_shuffle_v2f16_v8f16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15306,17 +15306,17 @@ define void @s_shuffle_v2f16_v8f16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15350,18 +15350,18 @@ define void @s_shuffle_v2f16_v8f16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15393,17 +15393,17 @@ define void @s_shuffle_v2f16_v8f16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15437,18 +15437,18 @@ define void @s_shuffle_v2f16_v8f16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15480,17 +15480,17 @@ define void @s_shuffle_v2f16_v8f16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15524,18 +15524,18 @@ define void @s_shuffle_v2f16_v8f16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15567,17 +15567,17 @@ define void @s_shuffle_v2f16_v8f16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15611,18 +15611,18 @@ define void @s_shuffle_v2f16_v8f16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15654,17 +15654,17 @@ define void @s_shuffle_v2f16_v8f16__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -15704,21 +15704,21 @@ define void @s_shuffle_v2f16_v8f16__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15757,20 +15757,20 @@ define void @s_shuffle_v2f16_v8f16__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15811,21 +15811,21 @@ define void @s_shuffle_v2f16_v8f16__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15864,20 +15864,20 @@ define void @s_shuffle_v2f16_v8f16__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15918,21 +15918,21 @@ define void @s_shuffle_v2f16_v8f16__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -15971,20 +15971,20 @@ define void @s_shuffle_v2f16_v8f16__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16015,17 +16015,17 @@ define void @s_shuffle_v2f16_v8f16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16055,17 +16055,17 @@ define void @s_shuffle_v2f16_v8f16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16099,18 +16099,18 @@ define void @s_shuffle_v2f16_v8f16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16144,18 +16144,18 @@ define void @s_shuffle_v2f16_v8f16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16191,19 +16191,19 @@ define void @s_shuffle_v2f16_v8f16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16237,18 +16237,18 @@ define void @s_shuffle_v2f16_v8f16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16284,19 +16284,19 @@ define void @s_shuffle_v2f16_v8f16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16330,18 +16330,18 @@ define void @s_shuffle_v2f16_v8f16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16377,19 +16377,19 @@ define void @s_shuffle_v2f16_v8f16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16419,17 +16419,17 @@ define void @s_shuffle_v2f16_v8f16__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16471,22 +16471,22 @@ define void @s_shuffle_v2f16_v8f16__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16527,21 +16527,21 @@ define void @s_shuffle_v2f16_v8f16__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16584,22 +16584,22 @@ define void @s_shuffle_v2f16_v8f16__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16640,21 +16640,21 @@ define void @s_shuffle_v2f16_v8f16__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16697,22 +16697,22 @@ define void @s_shuffle_v2f16_v8f16__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16753,21 +16753,21 @@ define void @s_shuffle_v2f16_v8f16__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -16800,17 +16800,17 @@ define void @s_shuffle_v2f16_v8f16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16842,17 +16842,17 @@ define void @s_shuffle_v2f16_v8f16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16886,18 +16886,18 @@ define void @s_shuffle_v2f16_v8f16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16929,17 +16929,17 @@ define void @s_shuffle_v2f16_v8f16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -16973,18 +16973,18 @@ define void @s_shuffle_v2f16_v8f16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17016,17 +17016,17 @@ define void @s_shuffle_v2f16_v8f16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17060,18 +17060,18 @@ define void @s_shuffle_v2f16_v8f16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17103,17 +17103,17 @@ define void @s_shuffle_v2f16_v8f16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17147,18 +17147,18 @@ define void @s_shuffle_v2f16_v8f16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17190,17 +17190,17 @@ define void @s_shuffle_v2f16_v8f16__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17240,21 +17240,21 @@ define void @s_shuffle_v2f16_v8f16__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17293,20 +17293,20 @@ define void @s_shuffle_v2f16_v8f16__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17347,21 +17347,21 @@ define void @s_shuffle_v2f16_v8f16__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17400,20 +17400,20 @@ define void @s_shuffle_v2f16_v8f16__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17454,21 +17454,21 @@ define void @s_shuffle_v2f16_v8f16__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17507,20 +17507,20 @@ define void @s_shuffle_v2f16_v8f16__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -17553,17 +17553,17 @@ define void @s_shuffle_v2f16_v8f16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17597,18 +17597,18 @@ define void @s_shuffle_v2f16_v8f16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17644,19 +17644,19 @@ define void @s_shuffle_v2f16_v8f16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17688,17 +17688,17 @@ define void @s_shuffle_v2f16_v8f16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17732,18 +17732,18 @@ define void @s_shuffle_v2f16_v8f16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17777,18 +17777,18 @@ define void @s_shuffle_v2f16_v8f16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17824,19 +17824,19 @@ define void @s_shuffle_v2f16_v8f16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17870,18 +17870,18 @@ define void @s_shuffle_v2f16_v8f16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17917,19 +17917,19 @@ define void @s_shuffle_v2f16_v8f16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -17961,17 +17961,17 @@ define void @s_shuffle_v2f16_v8f16__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18013,22 +18013,22 @@ define void @s_shuffle_v2f16_v8f16__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18069,21 +18069,21 @@ define void @s_shuffle_v2f16_v8f16__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18126,22 +18126,22 @@ define void @s_shuffle_v2f16_v8f16__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18182,21 +18182,21 @@ define void @s_shuffle_v2f16_v8f16__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18239,22 +18239,22 @@ define void @s_shuffle_v2f16_v8f16__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18295,21 +18295,21 @@ define void @s_shuffle_v2f16_v8f16__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18342,17 +18342,17 @@ define void @s_shuffle_v2f16_v8f16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18384,17 +18384,17 @@ define void @s_shuffle_v2f16_v8f16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18428,18 +18428,18 @@ define void @s_shuffle_v2f16_v8f16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18471,17 +18471,17 @@ define void @s_shuffle_v2f16_v8f16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18515,18 +18515,18 @@ define void @s_shuffle_v2f16_v8f16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18558,17 +18558,17 @@ define void @s_shuffle_v2f16_v8f16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18602,18 +18602,18 @@ define void @s_shuffle_v2f16_v8f16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18645,17 +18645,17 @@ define void @s_shuffle_v2f16_v8f16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18689,18 +18689,18 @@ define void @s_shuffle_v2f16_v8f16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18732,17 +18732,17 @@ define void @s_shuffle_v2f16_v8f16__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -18782,21 +18782,21 @@ define void @s_shuffle_v2f16_v8f16__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18835,20 +18835,20 @@ define void @s_shuffle_v2f16_v8f16__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18889,21 +18889,21 @@ define void @s_shuffle_v2f16_v8f16__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18942,20 +18942,20 @@ define void @s_shuffle_v2f16_v8f16__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -18996,21 +18996,21 @@ define void @s_shuffle_v2f16_v8f16__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19049,20 +19049,20 @@ define void @s_shuffle_v2f16_v8f16__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19095,17 +19095,17 @@ define void @s_shuffle_v2f16_v8f16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19139,18 +19139,18 @@ define void @s_shuffle_v2f16_v8f16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19186,19 +19186,19 @@ define void @s_shuffle_v2f16_v8f16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19232,18 +19232,18 @@ define void @s_shuffle_v2f16_v8f16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19279,19 +19279,19 @@ define void @s_shuffle_v2f16_v8f16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19323,17 +19323,17 @@ define void @s_shuffle_v2f16_v8f16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19367,18 +19367,18 @@ define void @s_shuffle_v2f16_v8f16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19412,18 +19412,18 @@ define void @s_shuffle_v2f16_v8f16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19459,19 +19459,19 @@ define void @s_shuffle_v2f16_v8f16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19503,17 +19503,17 @@ define void @s_shuffle_v2f16_v8f16__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19555,22 +19555,22 @@ define void @s_shuffle_v2f16_v8f16__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19611,21 +19611,21 @@ define void @s_shuffle_v2f16_v8f16__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19668,22 +19668,22 @@ define void @s_shuffle_v2f16_v8f16__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19724,21 +19724,21 @@ define void @s_shuffle_v2f16_v8f16__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19781,22 +19781,22 @@ define void @s_shuffle_v2f16_v8f16__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19837,21 +19837,21 @@ define void @s_shuffle_v2f16_v8f16__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -19884,17 +19884,17 @@ define void @s_shuffle_v2f16_v8f16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19926,17 +19926,17 @@ define void @s_shuffle_v2f16_v8f16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -19970,18 +19970,18 @@ define void @s_shuffle_v2f16_v8f16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20013,17 +20013,17 @@ define void @s_shuffle_v2f16_v8f16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20057,18 +20057,18 @@ define void @s_shuffle_v2f16_v8f16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20100,17 +20100,17 @@ define void @s_shuffle_v2f16_v8f16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20144,18 +20144,18 @@ define void @s_shuffle_v2f16_v8f16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20187,17 +20187,17 @@ define void @s_shuffle_v2f16_v8f16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20231,18 +20231,18 @@ define void @s_shuffle_v2f16_v8f16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20274,17 +20274,17 @@ define void @s_shuffle_v2f16_v8f16__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20324,21 +20324,21 @@ define void @s_shuffle_v2f16_v8f16__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20377,20 +20377,20 @@ define void @s_shuffle_v2f16_v8f16__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20431,21 +20431,21 @@ define void @s_shuffle_v2f16_v8f16__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20484,20 +20484,20 @@ define void @s_shuffle_v2f16_v8f16__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20538,21 +20538,21 @@ define void @s_shuffle_v2f16_v8f16__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20591,20 +20591,20 @@ define void @s_shuffle_v2f16_v8f16__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -20637,17 +20637,17 @@ define void @s_shuffle_v2f16_v8f16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20681,18 +20681,18 @@ define void @s_shuffle_v2f16_v8f16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20728,19 +20728,19 @@ define void @s_shuffle_v2f16_v8f16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20774,18 +20774,18 @@ define void @s_shuffle_v2f16_v8f16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20821,19 +20821,19 @@ define void @s_shuffle_v2f16_v8f16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20867,18 +20867,18 @@ define void @s_shuffle_v2f16_v8f16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20914,19 +20914,19 @@ define void @s_shuffle_v2f16_v8f16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -20958,17 +20958,17 @@ define void @s_shuffle_v2f16_v8f16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21002,18 +21002,18 @@ define void @s_shuffle_v2f16_v8f16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21045,17 +21045,17 @@ define void @s_shuffle_v2f16_v8f16__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21097,22 +21097,22 @@ define void @s_shuffle_v2f16_v8f16__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21153,21 +21153,21 @@ define void @s_shuffle_v2f16_v8f16__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21210,22 +21210,22 @@ define void @s_shuffle_v2f16_v8f16__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21266,21 +21266,21 @@ define void @s_shuffle_v2f16_v8f16__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21323,22 +21323,22 @@ define void @s_shuffle_v2f16_v8f16__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21379,21 +21379,21 @@ define void @s_shuffle_v2f16_v8f16__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21438,17 +21438,17 @@ define void @s_shuffle_v2f16_v8f16__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21480,17 +21480,17 @@ define void @s_shuffle_v2f16_v8f16__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21522,17 +21522,17 @@ define void @s_shuffle_v2f16_v8f16__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21564,17 +21564,17 @@ define void @s_shuffle_v2f16_v8f16__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21606,17 +21606,17 @@ define void @s_shuffle_v2f16_v8f16__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21648,17 +21648,17 @@ define void @s_shuffle_v2f16_v8f16__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21690,17 +21690,17 @@ define void @s_shuffle_v2f16_v8f16__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21732,17 +21732,17 @@ define void @s_shuffle_v2f16_v8f16__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) @@ -21790,18 +21790,18 @@ define void @s_shuffle_v2f16_v8f16__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21834,17 +21834,17 @@ define void @s_shuffle_v2f16_v8f16__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21879,18 +21879,18 @@ define void @s_shuffle_v2f16_v8f16__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21923,17 +21923,17 @@ define void @s_shuffle_v2f16_v8f16__12_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -21968,18 +21968,18 @@ define void @s_shuffle_v2f16_v8f16__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22012,17 +22012,17 @@ define void @s_shuffle_v2f16_v8f16__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22053,17 +22053,17 @@ define void @s_shuffle_v2f16_v8f16__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22104,21 +22104,21 @@ define void @s_shuffle_v2f16_v8f16__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22161,22 +22161,22 @@ define void @s_shuffle_v2f16_v8f16__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22217,21 +22217,21 @@ define void @s_shuffle_v2f16_v8f16__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22274,22 +22274,22 @@ define void @s_shuffle_v2f16_v8f16__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22330,21 +22330,21 @@ define void @s_shuffle_v2f16_v8f16__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22387,22 +22387,22 @@ define void @s_shuffle_v2f16_v8f16__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22443,21 +22443,21 @@ define void @s_shuffle_v2f16_v8f16__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22500,22 +22500,22 @@ define void @s_shuffle_v2f16_v8f16__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22546,17 +22546,17 @@ define void @s_shuffle_v2f16_v8f16__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22591,18 +22591,18 @@ define void @s_shuffle_v2f16_v8f16__9_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22637,18 +22637,18 @@ define void @s_shuffle_v2f16_v8f16__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22685,19 +22685,19 @@ define void @s_shuffle_v2f16_v8f16__11_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22732,18 +22732,18 @@ define void @s_shuffle_v2f16_v8f16__12_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22780,19 +22780,19 @@ define void @s_shuffle_v2f16_v8f16__13_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22827,18 +22827,18 @@ define void @s_shuffle_v2f16_v8f16__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22871,17 +22871,17 @@ define void @s_shuffle_v2f16_v8f16__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22920,20 +22920,20 @@ define void @s_shuffle_v2f16_v8f16__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -22974,21 +22974,21 @@ define void @s_shuffle_v2f16_v8f16__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23027,20 +23027,20 @@ define void @s_shuffle_v2f16_v8f16__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23081,21 +23081,21 @@ define void @s_shuffle_v2f16_v8f16__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23134,20 +23134,20 @@ define void @s_shuffle_v2f16_v8f16__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23188,21 +23188,21 @@ define void @s_shuffle_v2f16_v8f16__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23241,20 +23241,20 @@ define void @s_shuffle_v2f16_v8f16__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23295,21 +23295,21 @@ define void @s_shuffle_v2f16_v8f16__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23342,17 +23342,17 @@ define void @s_shuffle_v2f16_v8f16__8_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23387,18 +23387,18 @@ define void @s_shuffle_v2f16_v8f16__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23431,17 +23431,17 @@ define void @s_shuffle_v2f16_v8f16__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23476,18 +23476,18 @@ define void @s_shuffle_v2f16_v8f16__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23520,17 +23520,17 @@ define void @s_shuffle_v2f16_v8f16__12_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23565,18 +23565,18 @@ define void @s_shuffle_v2f16_v8f16__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23609,17 +23609,17 @@ define void @s_shuffle_v2f16_v8f16__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23652,17 +23652,17 @@ define void @s_shuffle_v2f16_v8f16__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23703,21 +23703,21 @@ define void @s_shuffle_v2f16_v8f16__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23760,22 +23760,22 @@ define void @s_shuffle_v2f16_v8f16__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23816,21 +23816,21 @@ define void @s_shuffle_v2f16_v8f16__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23873,22 +23873,22 @@ define void @s_shuffle_v2f16_v8f16__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23929,21 +23929,21 @@ define void @s_shuffle_v2f16_v8f16__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -23986,22 +23986,22 @@ define void @s_shuffle_v2f16_v8f16__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24042,21 +24042,21 @@ define void @s_shuffle_v2f16_v8f16__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24099,22 +24099,22 @@ define void @s_shuffle_v2f16_v8f16__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24149,18 +24149,18 @@ define void @s_shuffle_v2f16_v8f16__8_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24197,19 +24197,19 @@ define void @s_shuffle_v2f16_v8f16__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24242,17 +24242,17 @@ define void @s_shuffle_v2f16_v8f16__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24287,18 +24287,18 @@ define void @s_shuffle_v2f16_v8f16__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24333,18 +24333,18 @@ define void @s_shuffle_v2f16_v8f16__12_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24381,19 +24381,19 @@ define void @s_shuffle_v2f16_v8f16__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24428,18 +24428,18 @@ define void @s_shuffle_v2f16_v8f16__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24472,17 +24472,17 @@ define void @s_shuffle_v2f16_v8f16__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24521,20 +24521,20 @@ define void @s_shuffle_v2f16_v8f16__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24575,21 +24575,21 @@ define void @s_shuffle_v2f16_v8f16__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24628,20 +24628,20 @@ define void @s_shuffle_v2f16_v8f16__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24682,21 +24682,21 @@ define void @s_shuffle_v2f16_v8f16__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24735,20 +24735,20 @@ define void @s_shuffle_v2f16_v8f16__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24789,21 +24789,21 @@ define void @s_shuffle_v2f16_v8f16__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24842,20 +24842,20 @@ define void @s_shuffle_v2f16_v8f16__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24896,21 +24896,21 @@ define void @s_shuffle_v2f16_v8f16__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24943,17 +24943,17 @@ define void @s_shuffle_v2f16_v8f16__8_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -24988,18 +24988,18 @@ define void @s_shuffle_v2f16_v8f16__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25032,17 +25032,17 @@ define void @s_shuffle_v2f16_v8f16__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25077,18 +25077,18 @@ define void @s_shuffle_v2f16_v8f16__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25121,17 +25121,17 @@ define void @s_shuffle_v2f16_v8f16__12_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25166,18 +25166,18 @@ define void @s_shuffle_v2f16_v8f16__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25210,17 +25210,17 @@ define void @s_shuffle_v2f16_v8f16__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25253,17 +25253,17 @@ define void @s_shuffle_v2f16_v8f16__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25304,21 +25304,21 @@ define void @s_shuffle_v2f16_v8f16__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25361,22 +25361,22 @@ define void @s_shuffle_v2f16_v8f16__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25417,21 +25417,21 @@ define void @s_shuffle_v2f16_v8f16__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25474,22 +25474,22 @@ define void @s_shuffle_v2f16_v8f16__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25530,21 +25530,21 @@ define void @s_shuffle_v2f16_v8f16__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25587,22 +25587,22 @@ define void @s_shuffle_v2f16_v8f16__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25643,21 +25643,21 @@ define void @s_shuffle_v2f16_v8f16__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25700,22 +25700,22 @@ define void @s_shuffle_v2f16_v8f16__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25750,18 +25750,18 @@ define void @s_shuffle_v2f16_v8f16__8_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25798,19 +25798,19 @@ define void @s_shuffle_v2f16_v8f16__9_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25845,18 +25845,18 @@ define void @s_shuffle_v2f16_v8f16__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25893,19 +25893,19 @@ define void @s_shuffle_v2f16_v8f16__11_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25938,17 +25938,17 @@ define void @s_shuffle_v2f16_v8f16__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -25983,18 +25983,18 @@ define void @s_shuffle_v2f16_v8f16__13_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26029,18 +26029,18 @@ define void @s_shuffle_v2f16_v8f16__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26073,17 +26073,17 @@ define void @s_shuffle_v2f16_v8f16__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26122,20 +26122,20 @@ define void @s_shuffle_v2f16_v8f16__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26176,21 +26176,21 @@ define void @s_shuffle_v2f16_v8f16__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26229,20 +26229,20 @@ define void @s_shuffle_v2f16_v8f16__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26283,21 +26283,21 @@ define void @s_shuffle_v2f16_v8f16__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26336,20 +26336,20 @@ define void @s_shuffle_v2f16_v8f16__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26390,21 +26390,21 @@ define void @s_shuffle_v2f16_v8f16__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26443,20 +26443,20 @@ define void @s_shuffle_v2f16_v8f16__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26497,21 +26497,21 @@ define void @s_shuffle_v2f16_v8f16__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26544,17 +26544,17 @@ define void @s_shuffle_v2f16_v8f16__8_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26589,18 +26589,18 @@ define void @s_shuffle_v2f16_v8f16__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26633,17 +26633,17 @@ define void @s_shuffle_v2f16_v8f16__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26678,18 +26678,18 @@ define void @s_shuffle_v2f16_v8f16__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26722,17 +26722,17 @@ define void @s_shuffle_v2f16_v8f16__12_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26767,18 +26767,18 @@ define void @s_shuffle_v2f16_v8f16__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26811,17 +26811,17 @@ define void @s_shuffle_v2f16_v8f16__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26854,17 +26854,17 @@ define void @s_shuffle_v2f16_v8f16__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26905,21 +26905,21 @@ define void @s_shuffle_v2f16_v8f16__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -26962,22 +26962,22 @@ define void @s_shuffle_v2f16_v8f16__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27018,21 +27018,21 @@ define void @s_shuffle_v2f16_v8f16__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27075,22 +27075,22 @@ define void @s_shuffle_v2f16_v8f16__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27131,21 +27131,21 @@ define void @s_shuffle_v2f16_v8f16__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27188,22 +27188,22 @@ define void @s_shuffle_v2f16_v8f16__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27244,21 +27244,21 @@ define void @s_shuffle_v2f16_v8f16__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27301,22 +27301,22 @@ define void @s_shuffle_v2f16_v8f16__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27351,18 +27351,18 @@ define void @s_shuffle_v2f16_v8f16__8_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27399,19 +27399,19 @@ define void @s_shuffle_v2f16_v8f16__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27446,18 +27446,18 @@ define void @s_shuffle_v2f16_v8f16__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27494,19 +27494,19 @@ define void @s_shuffle_v2f16_v8f16__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27541,18 +27541,18 @@ define void @s_shuffle_v2f16_v8f16__12_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27589,19 +27589,19 @@ define void @s_shuffle_v2f16_v8f16__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> @@ -27634,17 +27634,17 @@ define void @s_shuffle_v2f16_v8f16__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f16_v8f16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f16_v8f16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x half> asm "; def $0", "=s"() %vec1 = call <8 x half> asm "; def $0", "=s"() %shuf = shufflevector <8 x half> %vec0, <8 x half> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll index 2f6ddc63cb3e4..9b3dc7f531021 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v2f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f32_v2f32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f32_v2f32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v2f32_v2f32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v2f32_v2f32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v2f32_v2f32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v2f32_v2f32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -282,17 +282,17 @@ define void @v_shuffle_v2f32_v2f32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -325,17 +325,17 @@ define void @v_shuffle_v2f32_v2f32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -368,17 +368,17 @@ define void @v_shuffle_v2f32_v2f32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -410,17 +410,17 @@ define void @v_shuffle_v2f32_v2f32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> zeroinitializer store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -452,17 +452,17 @@ define void @v_shuffle_v2f32_v2f32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -494,17 +494,17 @@ define void @v_shuffle_v2f32_v2f32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -534,16 +534,16 @@ define void @v_shuffle_v2f32_v2f32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -573,16 +573,16 @@ define void @v_shuffle_v2f32_v2f32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -614,17 +614,17 @@ define void @v_shuffle_v2f32_v2f32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -654,16 +654,16 @@ define void @v_shuffle_v2f32_v2f32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -704,16 +704,16 @@ define void @v_shuffle_v2f32_v2f32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -745,17 +745,17 @@ define void @v_shuffle_v2f32_v2f32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -796,16 +796,16 @@ define void @v_shuffle_v2f32_v2f32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -844,21 +844,21 @@ define void @v_shuffle_v2f32_v2f32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -897,21 +897,21 @@ define void @v_shuffle_v2f32_v2f32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -942,16 +942,16 @@ define void @v_shuffle_v2f32_v2f32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v2f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v2f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -996,17 +996,17 @@ define void @s_shuffle_v2f32_v2f32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1038,17 +1038,17 @@ define void @s_shuffle_v2f32_v2f32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1094,17 +1094,17 @@ define void @s_shuffle_v2f32_v2f32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1145,21 +1145,21 @@ define void @s_shuffle_v2f32_v2f32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1198,20 +1198,20 @@ define void @s_shuffle_v2f32_v2f32__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1246,18 +1246,18 @@ define void @s_shuffle_v2f32_v2f32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1309,17 +1309,17 @@ define void @s_shuffle_v2f32_v2f32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1371,18 +1371,18 @@ define void @s_shuffle_v2f32_v2f32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1414,17 +1414,17 @@ define void @s_shuffle_v2f32_v2f32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1454,17 +1454,17 @@ define void @s_shuffle_v2f32_v2f32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1494,17 +1494,17 @@ define void @s_shuffle_v2f32_v2f32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1552,17 +1552,17 @@ define void @s_shuffle_v2f32_v2f32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1606,17 +1606,17 @@ define void @s_shuffle_v2f32_v2f32__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1648,17 +1648,17 @@ define void @s_shuffle_v2f32_v2f32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -1702,17 +1702,17 @@ define void @s_shuffle_v2f32_v2f32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1751,20 +1751,20 @@ define void @s_shuffle_v2f32_v2f32__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1803,20 +1803,20 @@ define void @s_shuffle_v2f32_v2f32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> @@ -1847,17 +1847,17 @@ define void @s_shuffle_v2f32_v2f32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v2f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v2f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll index 3d42e66eb865c..34043cd067b25 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v3f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f32_v3f32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f32_v3f32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2f32_v3f32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -120,17 +120,17 @@ define void @v_shuffle_v2f32_v3f32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -172,17 +172,17 @@ define void @v_shuffle_v2f32_v3f32__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -215,17 +215,17 @@ define void @v_shuffle_v2f32_v3f32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -266,22 +266,22 @@ define void @v_shuffle_v2f32_v3f32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v2f32_v3f32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -374,21 +374,21 @@ define void @v_shuffle_v2f32_v3f32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -422,18 +422,18 @@ define void @v_shuffle_v2f32_v3f32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -466,17 +466,17 @@ define void @v_shuffle_v2f32_v3f32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -510,18 +510,18 @@ define void @v_shuffle_v2f32_v3f32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -554,17 +554,17 @@ define void @v_shuffle_v2f32_v3f32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -596,17 +596,17 @@ define void @v_shuffle_v2f32_v3f32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> zeroinitializer store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -638,17 +638,17 @@ define void @v_shuffle_v2f32_v3f32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -681,18 +681,18 @@ define void @v_shuffle_v2f32_v3f32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -724,17 +724,17 @@ define void @v_shuffle_v2f32_v3f32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -772,21 +772,21 @@ define void @v_shuffle_v2f32_v3f32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -817,16 +817,16 @@ define void @v_shuffle_v2f32_v3f32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -856,16 +856,16 @@ define void @v_shuffle_v2f32_v3f32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -897,17 +897,17 @@ define void @v_shuffle_v2f32_v3f32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -939,17 +939,17 @@ define void @v_shuffle_v2f32_v3f32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -979,16 +979,16 @@ define void @v_shuffle_v2f32_v3f32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1026,21 +1026,21 @@ define void @v_shuffle_v2f32_v3f32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1072,17 +1072,17 @@ define void @v_shuffle_v2f32_v3f32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1114,17 +1114,17 @@ define void @v_shuffle_v2f32_v3f32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1156,18 +1156,18 @@ define void @v_shuffle_v2f32_v3f32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1200,18 +1200,18 @@ define void @v_shuffle_v2f32_v3f32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1242,17 +1242,17 @@ define void @v_shuffle_v2f32_v3f32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1291,21 +1291,21 @@ define void @v_shuffle_v2f32_v3f32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1347,16 +1347,16 @@ define void @v_shuffle_v2f32_v3f32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1387,17 +1387,17 @@ define void @v_shuffle_v2f32_v3f32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1429,17 +1429,17 @@ define void @v_shuffle_v2f32_v3f32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1482,17 +1482,17 @@ define void @v_shuffle_v2f32_v3f32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1523,16 +1523,16 @@ define void @v_shuffle_v2f32_v3f32__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1571,21 +1571,21 @@ define void @v_shuffle_v2f32_v3f32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1624,21 +1624,21 @@ define void @v_shuffle_v2f32_v3f32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1677,21 +1677,21 @@ define void @v_shuffle_v2f32_v3f32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1722,16 +1722,16 @@ define void @v_shuffle_v2f32_v3f32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1764,17 +1764,17 @@ define void @v_shuffle_v2f32_v3f32__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1806,17 +1806,17 @@ define void @v_shuffle_v2f32_v3f32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1855,21 +1855,21 @@ define void @v_shuffle_v2f32_v3f32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1909,21 +1909,21 @@ define void @v_shuffle_v2f32_v3f32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -1963,21 +1963,21 @@ define void @v_shuffle_v2f32_v3f32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2010,17 +2010,17 @@ define void @v_shuffle_v2f32_v3f32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2053,18 +2053,18 @@ define void @v_shuffle_v2f32_v3f32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v3f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v3f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2109,17 +2109,17 @@ define void @s_shuffle_v2f32_v3f32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2151,17 +2151,17 @@ define void @s_shuffle_v2f32_v3f32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2193,17 +2193,17 @@ define void @s_shuffle_v2f32_v3f32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2249,17 +2249,17 @@ define void @s_shuffle_v2f32_v3f32__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2292,17 +2292,17 @@ define void @s_shuffle_v2f32_v3f32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2343,21 +2343,21 @@ define void @s_shuffle_v2f32_v3f32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2396,20 +2396,20 @@ define void @s_shuffle_v2f32_v3f32__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2450,21 +2450,21 @@ define void @s_shuffle_v2f32_v3f32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2499,18 +2499,18 @@ define void @s_shuffle_v2f32_v3f32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2564,18 +2564,18 @@ define void @s_shuffle_v2f32_v3f32__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2608,17 +2608,17 @@ define void @s_shuffle_v2f32_v3f32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2670,18 +2670,18 @@ define void @s_shuffle_v2f32_v3f32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2715,18 +2715,18 @@ define void @s_shuffle_v2f32_v3f32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2758,17 +2758,17 @@ define void @s_shuffle_v2f32_v3f32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2808,21 +2808,21 @@ define void @s_shuffle_v2f32_v3f32__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -2853,17 +2853,17 @@ define void @s_shuffle_v2f32_v3f32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2893,17 +2893,17 @@ define void @s_shuffle_v2f32_v3f32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -2969,17 +2969,17 @@ define void @s_shuffle_v2f32_v3f32__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3017,20 +3017,20 @@ define void @s_shuffle_v2f32_v3f32__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3063,17 +3063,17 @@ define void @s_shuffle_v2f32_v3f32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3125,18 +3125,18 @@ define void @s_shuffle_v2f32_v3f32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3170,18 +3170,18 @@ define void @s_shuffle_v2f32_v3f32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3213,17 +3213,17 @@ define void @s_shuffle_v2f32_v3f32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3263,21 +3263,21 @@ define void @s_shuffle_v2f32_v3f32__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3322,17 +3322,17 @@ define void @s_shuffle_v2f32_v3f32__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3364,17 +3364,17 @@ define void @s_shuffle_v2f32_v3f32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3406,17 +3406,17 @@ define void @s_shuffle_v2f32_v3f32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3464,18 +3464,18 @@ define void @s_shuffle_v2f32_v3f32__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3506,17 +3506,17 @@ define void @s_shuffle_v2f32_v3f32__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3555,20 +3555,20 @@ define void @s_shuffle_v2f32_v3f32__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3607,20 +3607,20 @@ define void @s_shuffle_v2f32_v3f32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3659,20 +3659,20 @@ define void @s_shuffle_v2f32_v3f32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3703,17 +3703,17 @@ define void @s_shuffle_v2f32_v3f32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3765,17 +3765,17 @@ define void @s_shuffle_v2f32_v3f32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3814,20 +3814,20 @@ define void @s_shuffle_v2f32_v3f32__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3868,21 +3868,21 @@ define void @s_shuffle_v2f32_v3f32__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3923,21 +3923,21 @@ define void @s_shuffle_v2f32_v3f32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> @@ -3991,18 +3991,18 @@ define void @s_shuffle_v2f32_v3f32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v3f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v3f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll index a312b40a99a81..07ca294019341 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f32_v4f32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f32_v4f32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2f32_v4f32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2f32_v4f32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -159,17 +159,17 @@ define void @v_shuffle_v2f32_v4f32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -211,17 +211,17 @@ define void @v_shuffle_v2f32_v4f32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -252,16 +252,16 @@ define void @v_shuffle_v2f32_v4f32__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -294,17 +294,17 @@ define void @v_shuffle_v2f32_v4f32__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -344,21 +344,21 @@ define void @v_shuffle_v2f32_v4f32__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -397,21 +397,21 @@ define void @v_shuffle_v2f32_v4f32__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -450,21 +450,21 @@ define void @v_shuffle_v2f32_v4f32__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -503,21 +503,21 @@ define void @v_shuffle_v2f32_v4f32__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -552,18 +552,18 @@ define void @v_shuffle_v2f32_v4f32__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -596,17 +596,17 @@ define void @v_shuffle_v2f32_v4f32__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -639,17 +639,17 @@ define void @v_shuffle_v2f32_v4f32__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -682,17 +682,17 @@ define void @v_shuffle_v2f32_v4f32__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -725,17 +725,17 @@ define void @v_shuffle_v2f32_v4f32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -767,17 +767,17 @@ define void @v_shuffle_v2f32_v4f32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> zeroinitializer store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -809,17 +809,17 @@ define void @v_shuffle_v2f32_v4f32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -851,17 +851,17 @@ define void @v_shuffle_v2f32_v4f32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -895,18 +895,18 @@ define void @v_shuffle_v2f32_v4f32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -938,17 +938,17 @@ define void @v_shuffle_v2f32_v4f32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -986,21 +986,21 @@ define void @v_shuffle_v2f32_v4f32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2f32_v4f32__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1084,16 +1084,16 @@ define void @v_shuffle_v2f32_v4f32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1123,16 +1123,16 @@ define void @v_shuffle_v2f32_v4f32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1164,17 +1164,17 @@ define void @v_shuffle_v2f32_v4f32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1206,17 +1206,17 @@ define void @v_shuffle_v2f32_v4f32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1248,17 +1248,17 @@ define void @v_shuffle_v2f32_v4f32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1288,16 +1288,16 @@ define void @v_shuffle_v2f32_v4f32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1335,21 +1335,21 @@ define void @v_shuffle_v2f32_v4f32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1388,21 +1388,21 @@ define void @v_shuffle_v2f32_v4f32__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1434,17 +1434,17 @@ define void @v_shuffle_v2f32_v4f32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1476,17 +1476,17 @@ define void @v_shuffle_v2f32_v4f32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1518,18 +1518,18 @@ define void @v_shuffle_v2f32_v4f32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1561,17 +1561,17 @@ define void @v_shuffle_v2f32_v4f32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1603,17 +1603,17 @@ define void @v_shuffle_v2f32_v4f32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1644,17 +1644,17 @@ define void @v_shuffle_v2f32_v4f32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,21 +1692,21 @@ define void @v_shuffle_v2f32_v4f32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1745,21 +1745,21 @@ define void @v_shuffle_v2f32_v4f32__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -1790,16 +1790,16 @@ define void @v_shuffle_v2f32_v4f32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1831,17 +1831,17 @@ define void @v_shuffle_v2f32_v4f32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1873,17 +1873,17 @@ define void @v_shuffle_v2f32_v4f32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1913,16 +1913,16 @@ define void @v_shuffle_v2f32_v4f32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1954,17 +1954,17 @@ define void @v_shuffle_v2f32_v4f32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1994,16 +1994,16 @@ define void @v_shuffle_v2f32_v4f32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2041,21 +2041,21 @@ define void @v_shuffle_v2f32_v4f32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2094,21 +2094,21 @@ define void @v_shuffle_v2f32_v4f32__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2150,16 +2150,16 @@ define void @v_shuffle_v2f32_v4f32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2190,17 +2190,17 @@ define void @v_shuffle_v2f32_v4f32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2230,16 +2230,16 @@ define void @v_shuffle_v2f32_v4f32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2271,17 +2271,17 @@ define void @v_shuffle_v2f32_v4f32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2324,17 +2324,17 @@ define void @v_shuffle_v2f32_v4f32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2367,17 +2367,17 @@ define void @v_shuffle_v2f32_v4f32__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2408,16 +2408,16 @@ define void @v_shuffle_v2f32_v4f32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2456,21 +2456,21 @@ define void @v_shuffle_v2f32_v4f32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2509,21 +2509,21 @@ define void @v_shuffle_v2f32_v4f32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2562,21 +2562,21 @@ define void @v_shuffle_v2f32_v4f32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2615,21 +2615,21 @@ define void @v_shuffle_v2f32_v4f32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2660,16 +2660,16 @@ define void @v_shuffle_v2f32_v4f32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2702,17 +2702,17 @@ define void @v_shuffle_v2f32_v4f32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2745,17 +2745,17 @@ define void @v_shuffle_v2f32_v4f32__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2787,17 +2787,17 @@ define void @v_shuffle_v2f32_v4f32__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2836,21 +2836,21 @@ define void @v_shuffle_v2f32_v4f32__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2889,21 +2889,21 @@ define void @v_shuffle_v2f32_v4f32__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2942,21 +2942,21 @@ define void @v_shuffle_v2f32_v4f32__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -2995,21 +2995,21 @@ define void @v_shuffle_v2f32_v4f32__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3042,17 +3042,17 @@ define void @v_shuffle_v2f32_v4f32__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3085,18 +3085,18 @@ define void @v_shuffle_v2f32_v4f32__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3129,17 +3129,17 @@ define void @v_shuffle_v2f32_v4f32__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3170,16 +3170,16 @@ define void @v_shuffle_v2f32_v4f32__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v2f32_v4f32__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v2f32_v4f32__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3324,21 +3324,21 @@ define void @v_shuffle_v2f32_v4f32__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3377,21 +3377,21 @@ define void @v_shuffle_v2f32_v4f32__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3424,17 +3424,17 @@ define void @v_shuffle_v2f32_v4f32__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3467,17 +3467,17 @@ define void @v_shuffle_v2f32_v4f32__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3508,16 +3508,16 @@ define void @v_shuffle_v2f32_v4f32__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v4f32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v4f32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3562,17 +3562,17 @@ define void @s_shuffle_v2f32_v4f32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3604,17 +3604,17 @@ define void @s_shuffle_v2f32_v4f32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3646,17 +3646,17 @@ define void @s_shuffle_v2f32_v4f32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3688,17 +3688,17 @@ define void @s_shuffle_v2f32_v4f32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -3744,17 +3744,17 @@ define void @s_shuffle_v2f32_v4f32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3787,17 +3787,17 @@ define void @s_shuffle_v2f32_v4f32__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3830,17 +3830,17 @@ define void @s_shuffle_v2f32_v4f32__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3881,21 +3881,21 @@ define void @s_shuffle_v2f32_v4f32__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3934,20 +3934,20 @@ define void @s_shuffle_v2f32_v4f32__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -3988,21 +3988,21 @@ define void @s_shuffle_v2f32_v4f32__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4043,21 +4043,21 @@ define void @s_shuffle_v2f32_v4f32__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4092,18 +4092,18 @@ define void @s_shuffle_v2f32_v4f32__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4157,18 +4157,18 @@ define void @s_shuffle_v2f32_v4f32__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4203,18 +4203,18 @@ define void @s_shuffle_v2f32_v4f32__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4247,17 +4247,17 @@ define void @s_shuffle_v2f32_v4f32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4309,18 +4309,18 @@ define void @s_shuffle_v2f32_v4f32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4354,18 +4354,18 @@ define void @s_shuffle_v2f32_v4f32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4399,18 +4399,18 @@ define void @s_shuffle_v2f32_v4f32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4442,17 +4442,17 @@ define void @s_shuffle_v2f32_v4f32__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4492,21 +4492,21 @@ define void @s_shuffle_v2f32_v4f32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4547,21 +4547,21 @@ define void @s_shuffle_v2f32_v4f32__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4592,17 +4592,17 @@ define void @s_shuffle_v2f32_v4f32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4632,17 +4632,17 @@ define void @s_shuffle_v2f32_v4f32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4694,18 +4694,18 @@ define void @s_shuffle_v2f32_v4f32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4753,17 +4753,17 @@ define void @s_shuffle_v2f32_v4f32__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4801,20 +4801,20 @@ define void @s_shuffle_v2f32_v4f32__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4855,21 +4855,21 @@ define void @s_shuffle_v2f32_v4f32__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -4902,17 +4902,17 @@ define void @s_shuffle_v2f32_v4f32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -4964,18 +4964,18 @@ define void @s_shuffle_v2f32_v4f32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5009,18 +5009,18 @@ define void @s_shuffle_v2f32_v4f32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5054,18 +5054,18 @@ define void @s_shuffle_v2f32_v4f32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5097,17 +5097,17 @@ define void @s_shuffle_v2f32_v4f32__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5147,21 +5147,21 @@ define void @s_shuffle_v2f32_v4f32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5202,21 +5202,21 @@ define void @s_shuffle_v2f32_v4f32__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5249,17 +5249,17 @@ define void @s_shuffle_v2f32_v4f32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5311,18 +5311,18 @@ define void @s_shuffle_v2f32_v4f32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5354,17 +5354,17 @@ define void @s_shuffle_v2f32_v4f32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5398,18 +5398,18 @@ define void @s_shuffle_v2f32_v4f32__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5441,17 +5441,17 @@ define void @s_shuffle_v2f32_v4f32__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5491,21 +5491,21 @@ define void @s_shuffle_v2f32_v4f32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5546,21 +5546,21 @@ define void @s_shuffle_v2f32_v4f32__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5605,17 +5605,17 @@ define void @s_shuffle_v2f32_v4f32__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5647,17 +5647,17 @@ define void @s_shuffle_v2f32_v4f32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5689,17 +5689,17 @@ define void @s_shuffle_v2f32_v4f32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5731,17 +5731,17 @@ define void @s_shuffle_v2f32_v4f32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -5789,18 +5789,18 @@ define void @s_shuffle_v2f32_v4f32__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5835,18 +5835,18 @@ define void @s_shuffle_v2f32_v4f32__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5877,17 +5877,17 @@ define void @s_shuffle_v2f32_v4f32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5926,20 +5926,20 @@ define void @s_shuffle_v2f32_v4f32__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -5978,20 +5978,20 @@ define void @s_shuffle_v2f32_v4f32__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6032,21 +6032,21 @@ define void @s_shuffle_v2f32_v4f32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6085,20 +6085,20 @@ define void @s_shuffle_v2f32_v4f32__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6129,17 +6129,17 @@ define void @s_shuffle_v2f32_v4f32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6193,18 +6193,18 @@ define void @s_shuffle_v2f32_v4f32__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6237,17 +6237,17 @@ define void @s_shuffle_v2f32_v4f32__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6286,20 +6286,20 @@ define void @s_shuffle_v2f32_v4f32__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6340,21 +6340,21 @@ define void @s_shuffle_v2f32_v4f32__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6395,21 +6395,21 @@ define void @s_shuffle_v2f32_v4f32__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6450,21 +6450,21 @@ define void @s_shuffle_v2f32_v4f32__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6518,18 +6518,18 @@ define void @s_shuffle_v2f32_v4f32__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6564,18 +6564,18 @@ define void @s_shuffle_v2f32_v4f32__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6608,17 +6608,17 @@ define void @s_shuffle_v2f32_v4f32__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6657,20 +6657,20 @@ define void @s_shuffle_v2f32_v4f32__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6711,21 +6711,21 @@ define void @s_shuffle_v2f32_v4f32__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6766,21 +6766,21 @@ define void @s_shuffle_v2f32_v4f32__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6821,21 +6821,21 @@ define void @s_shuffle_v2f32_v4f32__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6889,18 +6889,18 @@ define void @s_shuffle_v2f32_v4f32__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> @@ -6933,17 +6933,17 @@ define void @s_shuffle_v2f32_v4f32__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v4f32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v4f32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll index 2568390d8d7a6..3deb23ca5314b 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2f32_v8f32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2f32_v8f32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2f32_v8f32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2f32_v8f32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -158,17 +158,17 @@ define void @v_shuffle_v2f32_v8f32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -198,16 +198,16 @@ define void @v_shuffle_v2f32_v8f32__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -238,17 +238,17 @@ define void @v_shuffle_v2f32_v8f32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -278,16 +278,16 @@ define void @v_shuffle_v2f32_v8f32__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -319,17 +319,17 @@ define void @v_shuffle_v2f32_v8f32__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -371,17 +371,17 @@ define void @v_shuffle_v2f32_v8f32__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -412,16 +412,16 @@ define void @v_shuffle_v2f32_v8f32__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -453,17 +453,17 @@ define void @v_shuffle_v2f32_v8f32__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -494,16 +494,16 @@ define void @v_shuffle_v2f32_v8f32__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -535,17 +535,17 @@ define void @v_shuffle_v2f32_v8f32__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -576,16 +576,16 @@ define void @v_shuffle_v2f32_v8f32__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -618,17 +618,17 @@ define void @v_shuffle_v2f32_v8f32__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -668,21 +668,21 @@ define void @v_shuffle_v2f32_v8f32__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -721,21 +721,21 @@ define void @v_shuffle_v2f32_v8f32__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -774,21 +774,21 @@ define void @v_shuffle_v2f32_v8f32__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -827,21 +827,21 @@ define void @v_shuffle_v2f32_v8f32__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -880,21 +880,21 @@ define void @v_shuffle_v2f32_v8f32__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -933,21 +933,21 @@ define void @v_shuffle_v2f32_v8f32__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -986,21 +986,21 @@ define void @v_shuffle_v2f32_v8f32__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2f32_v8f32__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1088,18 +1088,18 @@ define void @v_shuffle_v2f32_v8f32__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1132,17 +1132,17 @@ define void @v_shuffle_v2f32_v8f32__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1176,18 +1176,18 @@ define void @v_shuffle_v2f32_v8f32__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1220,17 +1220,17 @@ define void @v_shuffle_v2f32_v8f32__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1264,18 +1264,18 @@ define void @v_shuffle_v2f32_v8f32__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1308,17 +1308,17 @@ define void @v_shuffle_v2f32_v8f32__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1351,17 +1351,17 @@ define void @v_shuffle_v2f32_v8f32__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1394,17 +1394,17 @@ define void @v_shuffle_v2f32_v8f32__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1437,17 +1437,17 @@ define void @v_shuffle_v2f32_v8f32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1479,17 +1479,17 @@ define void @v_shuffle_v2f32_v8f32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> zeroinitializer store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1521,17 +1521,17 @@ define void @v_shuffle_v2f32_v8f32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1563,17 +1563,17 @@ define void @v_shuffle_v2f32_v8f32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1606,18 +1606,18 @@ define void @v_shuffle_v2f32_v8f32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1649,17 +1649,17 @@ define void @v_shuffle_v2f32_v8f32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,18 +1692,18 @@ define void @v_shuffle_v2f32_v8f32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1735,17 +1735,17 @@ define void @v_shuffle_v2f32_v8f32__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1779,18 +1779,18 @@ define void @v_shuffle_v2f32_v8f32__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1822,17 +1822,17 @@ define void @v_shuffle_v2f32_v8f32__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1870,21 +1870,21 @@ define void @v_shuffle_v2f32_v8f32__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1923,21 +1923,21 @@ define void @v_shuffle_v2f32_v8f32__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -1976,21 +1976,21 @@ define void @v_shuffle_v2f32_v8f32__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2029,21 +2029,21 @@ define void @v_shuffle_v2f32_v8f32__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2082,21 +2082,21 @@ define void @v_shuffle_v2f32_v8f32__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2135,21 +2135,21 @@ define void @v_shuffle_v2f32_v8f32__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2180,16 +2180,16 @@ define void @v_shuffle_v2f32_v8f32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2219,16 +2219,16 @@ define void @v_shuffle_v2f32_v8f32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2260,17 +2260,17 @@ define void @v_shuffle_v2f32_v8f32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2302,17 +2302,17 @@ define void @v_shuffle_v2f32_v8f32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2344,17 +2344,17 @@ define void @v_shuffle_v2f32_v8f32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2386,17 +2386,17 @@ define void @v_shuffle_v2f32_v8f32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2428,17 +2428,17 @@ define void @v_shuffle_v2f32_v8f32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2470,17 +2470,17 @@ define void @v_shuffle_v2f32_v8f32__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2512,17 +2512,17 @@ define void @v_shuffle_v2f32_v8f32__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2552,16 +2552,16 @@ define void @v_shuffle_v2f32_v8f32__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2599,21 +2599,21 @@ define void @v_shuffle_v2f32_v8f32__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2652,21 +2652,21 @@ define void @v_shuffle_v2f32_v8f32__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2705,21 +2705,21 @@ define void @v_shuffle_v2f32_v8f32__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2758,21 +2758,21 @@ define void @v_shuffle_v2f32_v8f32__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2811,21 +2811,21 @@ define void @v_shuffle_v2f32_v8f32__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2864,21 +2864,21 @@ define void @v_shuffle_v2f32_v8f32__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -2910,17 +2910,17 @@ define void @v_shuffle_v2f32_v8f32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2952,17 +2952,17 @@ define void @v_shuffle_v2f32_v8f32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2994,18 +2994,18 @@ define void @v_shuffle_v2f32_v8f32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3037,17 +3037,17 @@ define void @v_shuffle_v2f32_v8f32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3079,17 +3079,17 @@ define void @v_shuffle_v2f32_v8f32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3121,17 +3121,17 @@ define void @v_shuffle_v2f32_v8f32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3164,18 +3164,18 @@ define void @v_shuffle_v2f32_v8f32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3207,17 +3207,17 @@ define void @v_shuffle_v2f32_v8f32__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3250,18 +3250,18 @@ define void @v_shuffle_v2f32_v8f32__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3292,17 +3292,17 @@ define void @v_shuffle_v2f32_v8f32__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3340,21 +3340,21 @@ define void @v_shuffle_v2f32_v8f32__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3393,21 +3393,21 @@ define void @v_shuffle_v2f32_v8f32__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3446,21 +3446,21 @@ define void @v_shuffle_v2f32_v8f32__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3499,21 +3499,21 @@ define void @v_shuffle_v2f32_v8f32__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3552,21 +3552,21 @@ define void @v_shuffle_v2f32_v8f32__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3605,21 +3605,21 @@ define void @v_shuffle_v2f32_v8f32__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -3650,16 +3650,16 @@ define void @v_shuffle_v2f32_v8f32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3691,17 +3691,17 @@ define void @v_shuffle_v2f32_v8f32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3733,17 +3733,17 @@ define void @v_shuffle_v2f32_v8f32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3773,16 +3773,16 @@ define void @v_shuffle_v2f32_v8f32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3814,17 +3814,17 @@ define void @v_shuffle_v2f32_v8f32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3856,17 +3856,17 @@ define void @v_shuffle_v2f32_v8f32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3898,17 +3898,17 @@ define void @v_shuffle_v2f32_v8f32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3940,17 +3940,17 @@ define void @v_shuffle_v2f32_v8f32__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3982,17 +3982,17 @@ define void @v_shuffle_v2f32_v8f32__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4022,16 +4022,16 @@ define void @v_shuffle_v2f32_v8f32__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4069,21 +4069,21 @@ define void @v_shuffle_v2f32_v8f32__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4122,21 +4122,21 @@ define void @v_shuffle_v2f32_v8f32__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4175,21 +4175,21 @@ define void @v_shuffle_v2f32_v8f32__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4228,21 +4228,21 @@ define void @v_shuffle_v2f32_v8f32__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4281,21 +4281,21 @@ define void @v_shuffle_v2f32_v8f32__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4334,21 +4334,21 @@ define void @v_shuffle_v2f32_v8f32__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4380,17 +4380,17 @@ define void @v_shuffle_v2f32_v8f32__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4422,17 +4422,17 @@ define void @v_shuffle_v2f32_v8f32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4465,18 +4465,18 @@ define void @v_shuffle_v2f32_v8f32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4508,17 +4508,17 @@ define void @v_shuffle_v2f32_v8f32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4550,18 +4550,18 @@ define void @v_shuffle_v2f32_v8f32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4593,17 +4593,17 @@ define void @v_shuffle_v2f32_v8f32__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4635,17 +4635,17 @@ define void @v_shuffle_v2f32_v8f32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4677,17 +4677,17 @@ define void @v_shuffle_v2f32_v8f32__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4720,18 +4720,18 @@ define void @v_shuffle_v2f32_v8f32__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4762,17 +4762,17 @@ define void @v_shuffle_v2f32_v8f32__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4810,21 +4810,21 @@ define void @v_shuffle_v2f32_v8f32__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4863,21 +4863,21 @@ define void @v_shuffle_v2f32_v8f32__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4916,21 +4916,21 @@ define void @v_shuffle_v2f32_v8f32__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -4969,21 +4969,21 @@ define void @v_shuffle_v2f32_v8f32__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5022,21 +5022,21 @@ define void @v_shuffle_v2f32_v8f32__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5075,21 +5075,21 @@ define void @v_shuffle_v2f32_v8f32__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5120,16 +5120,16 @@ define void @v_shuffle_v2f32_v8f32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5161,17 +5161,17 @@ define void @v_shuffle_v2f32_v8f32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5203,17 +5203,17 @@ define void @v_shuffle_v2f32_v8f32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5245,17 +5245,17 @@ define void @v_shuffle_v2f32_v8f32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5287,17 +5287,17 @@ define void @v_shuffle_v2f32_v8f32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5327,16 +5327,16 @@ define void @v_shuffle_v2f32_v8f32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5368,17 +5368,17 @@ define void @v_shuffle_v2f32_v8f32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5410,17 +5410,17 @@ define void @v_shuffle_v2f32_v8f32__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5452,17 +5452,17 @@ define void @v_shuffle_v2f32_v8f32__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5492,16 +5492,16 @@ define void @v_shuffle_v2f32_v8f32__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5539,21 +5539,21 @@ define void @v_shuffle_v2f32_v8f32__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5592,21 +5592,21 @@ define void @v_shuffle_v2f32_v8f32__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5645,21 +5645,21 @@ define void @v_shuffle_v2f32_v8f32__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5698,21 +5698,21 @@ define void @v_shuffle_v2f32_v8f32__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5751,21 +5751,21 @@ define void @v_shuffle_v2f32_v8f32__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5804,21 +5804,21 @@ define void @v_shuffle_v2f32_v8f32__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -5850,17 +5850,17 @@ define void @v_shuffle_v2f32_v8f32__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5892,17 +5892,17 @@ define void @v_shuffle_v2f32_v8f32__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5935,18 +5935,18 @@ define void @v_shuffle_v2f32_v8f32__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5978,17 +5978,17 @@ define void @v_shuffle_v2f32_v8f32__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6021,18 +6021,18 @@ define void @v_shuffle_v2f32_v8f32__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6064,17 +6064,17 @@ define void @v_shuffle_v2f32_v8f32__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6106,18 +6106,18 @@ define void @v_shuffle_v2f32_v8f32__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6149,17 +6149,17 @@ define void @v_shuffle_v2f32_v8f32__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6191,17 +6191,17 @@ define void @v_shuffle_v2f32_v8f32__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6232,17 +6232,17 @@ define void @v_shuffle_v2f32_v8f32__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6280,21 +6280,21 @@ define void @v_shuffle_v2f32_v8f32__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6333,21 +6333,21 @@ define void @v_shuffle_v2f32_v8f32__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6386,21 +6386,21 @@ define void @v_shuffle_v2f32_v8f32__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6439,21 +6439,21 @@ define void @v_shuffle_v2f32_v8f32__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6492,21 +6492,21 @@ define void @v_shuffle_v2f32_v8f32__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6545,21 +6545,21 @@ define void @v_shuffle_v2f32_v8f32__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -6590,16 +6590,16 @@ define void @v_shuffle_v2f32_v8f32__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6631,17 +6631,17 @@ define void @v_shuffle_v2f32_v8f32__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6673,17 +6673,17 @@ define void @v_shuffle_v2f32_v8f32__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6715,17 +6715,17 @@ define void @v_shuffle_v2f32_v8f32__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6757,17 +6757,17 @@ define void @v_shuffle_v2f32_v8f32__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6799,17 +6799,17 @@ define void @v_shuffle_v2f32_v8f32__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6841,17 +6841,17 @@ define void @v_shuffle_v2f32_v8f32__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6881,16 +6881,16 @@ define void @v_shuffle_v2f32_v8f32__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6922,17 +6922,17 @@ define void @v_shuffle_v2f32_v8f32__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6962,16 +6962,16 @@ define void @v_shuffle_v2f32_v8f32__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7009,21 +7009,21 @@ define void @v_shuffle_v2f32_v8f32__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7062,21 +7062,21 @@ define void @v_shuffle_v2f32_v8f32__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7115,21 +7115,21 @@ define void @v_shuffle_v2f32_v8f32__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7168,21 +7168,21 @@ define void @v_shuffle_v2f32_v8f32__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7221,21 +7221,21 @@ define void @v_shuffle_v2f32_v8f32__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7274,21 +7274,21 @@ define void @v_shuffle_v2f32_v8f32__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7330,16 +7330,16 @@ define void @v_shuffle_v2f32_v8f32__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7370,17 +7370,17 @@ define void @v_shuffle_v2f32_v8f32__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7410,16 +7410,16 @@ define void @v_shuffle_v2f32_v8f32__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7450,17 +7450,17 @@ define void @v_shuffle_v2f32_v8f32__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7490,16 +7490,16 @@ define void @v_shuffle_v2f32_v8f32__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7530,17 +7530,17 @@ define void @v_shuffle_v2f32_v8f32__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7570,16 +7570,16 @@ define void @v_shuffle_v2f32_v8f32__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7611,17 +7611,17 @@ define void @v_shuffle_v2f32_v8f32__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> store <2 x float> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7664,17 +7664,17 @@ define void @v_shuffle_v2f32_v8f32__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7707,17 +7707,17 @@ define void @v_shuffle_v2f32_v8f32__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7751,18 +7751,18 @@ define void @v_shuffle_v2f32_v8f32__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7795,17 +7795,17 @@ define void @v_shuffle_v2f32_v8f32__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7839,18 +7839,18 @@ define void @v_shuffle_v2f32_v8f32__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7883,17 +7883,17 @@ define void @v_shuffle_v2f32_v8f32__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7924,16 +7924,16 @@ define void @v_shuffle_v2f32_v8f32__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -7972,21 +7972,21 @@ define void @v_shuffle_v2f32_v8f32__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8025,21 +8025,21 @@ define void @v_shuffle_v2f32_v8f32__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8078,21 +8078,21 @@ define void @v_shuffle_v2f32_v8f32__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8131,21 +8131,21 @@ define void @v_shuffle_v2f32_v8f32__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8184,21 +8184,21 @@ define void @v_shuffle_v2f32_v8f32__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8237,21 +8237,21 @@ define void @v_shuffle_v2f32_v8f32__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8290,21 +8290,21 @@ define void @v_shuffle_v2f32_v8f32__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8343,21 +8343,21 @@ define void @v_shuffle_v2f32_v8f32__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8388,16 +8388,16 @@ define void @v_shuffle_v2f32_v8f32__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8430,17 +8430,17 @@ define void @v_shuffle_v2f32_v8f32__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8473,17 +8473,17 @@ define void @v_shuffle_v2f32_v8f32__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8516,17 +8516,17 @@ define void @v_shuffle_v2f32_v8f32__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8559,17 +8559,17 @@ define void @v_shuffle_v2f32_v8f32__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8602,17 +8602,17 @@ define void @v_shuffle_v2f32_v8f32__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8645,17 +8645,17 @@ define void @v_shuffle_v2f32_v8f32__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8687,17 +8687,17 @@ define void @v_shuffle_v2f32_v8f32__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8736,21 +8736,21 @@ define void @v_shuffle_v2f32_v8f32__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8789,21 +8789,21 @@ define void @v_shuffle_v2f32_v8f32__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8842,21 +8842,21 @@ define void @v_shuffle_v2f32_v8f32__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8895,21 +8895,21 @@ define void @v_shuffle_v2f32_v8f32__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -8948,21 +8948,21 @@ define void @v_shuffle_v2f32_v8f32__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v8 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v8 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9001,21 +9001,21 @@ define void @v_shuffle_v2f32_v8f32__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9054,21 +9054,21 @@ define void @v_shuffle_v2f32_v8f32__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v10 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v10 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9107,21 +9107,21 @@ define void @v_shuffle_v2f32_v8f32__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9154,17 +9154,17 @@ define void @v_shuffle_v2f32_v8f32__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9197,18 +9197,18 @@ define void @v_shuffle_v2f32_v8f32__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9241,17 +9241,17 @@ define void @v_shuffle_v2f32_v8f32__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9284,17 +9284,17 @@ define void @v_shuffle_v2f32_v8f32__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9327,17 +9327,17 @@ define void @v_shuffle_v2f32_v8f32__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9371,18 +9371,18 @@ define void @v_shuffle_v2f32_v8f32__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9415,17 +9415,17 @@ define void @v_shuffle_v2f32_v8f32__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9456,16 +9456,16 @@ define void @v_shuffle_v2f32_v8f32__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9504,21 +9504,21 @@ define void @v_shuffle_v2f32_v8f32__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9557,21 +9557,21 @@ define void @v_shuffle_v2f32_v8f32__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9610,21 +9610,21 @@ define void @v_shuffle_v2f32_v8f32__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9663,21 +9663,21 @@ define void @v_shuffle_v2f32_v8f32__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9716,21 +9716,21 @@ define void @v_shuffle_v2f32_v8f32__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9769,21 +9769,21 @@ define void @v_shuffle_v2f32_v8f32__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9822,21 +9822,21 @@ define void @v_shuffle_v2f32_v8f32__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9875,21 +9875,21 @@ define void @v_shuffle_v2f32_v8f32__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9922,17 +9922,17 @@ define void @v_shuffle_v2f32_v8f32__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -9965,17 +9965,17 @@ define void @v_shuffle_v2f32_v8f32__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10006,16 +10006,16 @@ define void @v_shuffle_v2f32_v8f32__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10048,17 +10048,17 @@ define void @v_shuffle_v2f32_v8f32__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10091,17 +10091,17 @@ define void @v_shuffle_v2f32_v8f32__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10134,17 +10134,17 @@ define void @v_shuffle_v2f32_v8f32__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10177,17 +10177,17 @@ define void @v_shuffle_v2f32_v8f32__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10219,17 +10219,17 @@ define void @v_shuffle_v2f32_v8f32__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10268,21 +10268,21 @@ define void @v_shuffle_v2f32_v8f32__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10321,21 +10321,21 @@ define void @v_shuffle_v2f32_v8f32__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10374,21 +10374,21 @@ define void @v_shuffle_v2f32_v8f32__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v8 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v8 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10427,21 +10427,21 @@ define void @v_shuffle_v2f32_v8f32__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10480,21 +10480,21 @@ define void @v_shuffle_v2f32_v8f32__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v10 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v10 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10533,21 +10533,21 @@ define void @v_shuffle_v2f32_v8f32__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10586,21 +10586,21 @@ define void @v_shuffle_v2f32_v8f32__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v12 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v12 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10639,21 +10639,21 @@ define void @v_shuffle_v2f32_v8f32__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10686,17 +10686,17 @@ define void @v_shuffle_v2f32_v8f32__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10730,18 +10730,18 @@ define void @v_shuffle_v2f32_v8f32__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10774,17 +10774,17 @@ define void @v_shuffle_v2f32_v8f32__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10817,18 +10817,18 @@ define void @v_shuffle_v2f32_v8f32__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10861,17 +10861,17 @@ define void @v_shuffle_v2f32_v8f32__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10904,17 +10904,17 @@ define void @v_shuffle_v2f32_v8f32__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10947,17 +10947,17 @@ define void @v_shuffle_v2f32_v8f32__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -10988,16 +10988,16 @@ define void @v_shuffle_v2f32_v8f32__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11036,21 +11036,21 @@ define void @v_shuffle_v2f32_v8f32__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11089,21 +11089,21 @@ define void @v_shuffle_v2f32_v8f32__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11142,21 +11142,21 @@ define void @v_shuffle_v2f32_v8f32__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11195,21 +11195,21 @@ define void @v_shuffle_v2f32_v8f32__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11248,21 +11248,21 @@ define void @v_shuffle_v2f32_v8f32__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11301,21 +11301,21 @@ define void @v_shuffle_v2f32_v8f32__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11354,21 +11354,21 @@ define void @v_shuffle_v2f32_v8f32__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11407,21 +11407,21 @@ define void @v_shuffle_v2f32_v8f32__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11454,17 +11454,17 @@ define void @v_shuffle_v2f32_v8f32__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11497,17 +11497,17 @@ define void @v_shuffle_v2f32_v8f32__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11540,17 +11540,17 @@ define void @v_shuffle_v2f32_v8f32__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11583,17 +11583,17 @@ define void @v_shuffle_v2f32_v8f32__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11624,16 +11624,16 @@ define void @v_shuffle_v2f32_v8f32__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11666,17 +11666,17 @@ define void @v_shuffle_v2f32_v8f32__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11709,17 +11709,17 @@ define void @v_shuffle_v2f32_v8f32__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11751,17 +11751,17 @@ define void @v_shuffle_v2f32_v8f32__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11800,21 +11800,21 @@ define void @v_shuffle_v2f32_v8f32__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11853,21 +11853,21 @@ define void @v_shuffle_v2f32_v8f32__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11906,21 +11906,21 @@ define void @v_shuffle_v2f32_v8f32__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v10 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v10 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -11959,21 +11959,21 @@ define void @v_shuffle_v2f32_v8f32__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12012,21 +12012,21 @@ define void @v_shuffle_v2f32_v8f32__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v12 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v12 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12065,21 +12065,21 @@ define void @v_shuffle_v2f32_v8f32__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12118,21 +12118,21 @@ define void @v_shuffle_v2f32_v8f32__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v14 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v14 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12171,21 +12171,21 @@ define void @v_shuffle_v2f32_v8f32__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12218,17 +12218,17 @@ define void @v_shuffle_v2f32_v8f32__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12262,18 +12262,18 @@ define void @v_shuffle_v2f32_v8f32__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12306,17 +12306,17 @@ define void @v_shuffle_v2f32_v8f32__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12350,18 +12350,18 @@ define void @v_shuffle_v2f32_v8f32__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12394,17 +12394,17 @@ define void @v_shuffle_v2f32_v8f32__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12437,18 +12437,18 @@ define void @v_shuffle_v2f32_v8f32__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12481,17 +12481,17 @@ define void @v_shuffle_v2f32_v8f32__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12522,16 +12522,16 @@ define void @v_shuffle_v2f32_v8f32__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12570,21 +12570,21 @@ define void @v_shuffle_v2f32_v8f32__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12623,21 +12623,21 @@ define void @v_shuffle_v2f32_v8f32__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12676,21 +12676,21 @@ define void @v_shuffle_v2f32_v8f32__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12729,21 +12729,21 @@ define void @v_shuffle_v2f32_v8f32__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12782,21 +12782,21 @@ define void @v_shuffle_v2f32_v8f32__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12835,21 +12835,21 @@ define void @v_shuffle_v2f32_v8f32__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12888,21 +12888,21 @@ define void @v_shuffle_v2f32_v8f32__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12941,21 +12941,21 @@ define void @v_shuffle_v2f32_v8f32__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -12988,17 +12988,17 @@ define void @v_shuffle_v2f32_v8f32__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13031,17 +13031,17 @@ define void @v_shuffle_v2f32_v8f32__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13074,17 +13074,17 @@ define void @v_shuffle_v2f32_v8f32__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13117,17 +13117,17 @@ define void @v_shuffle_v2f32_v8f32__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13160,17 +13160,17 @@ define void @v_shuffle_v2f32_v8f32__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13203,17 +13203,17 @@ define void @v_shuffle_v2f32_v8f32__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13244,16 +13244,16 @@ define void @v_shuffle_v2f32_v8f32__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2f32_v8f32__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2f32_v8f32__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=v"() %vec1 = call <8 x float> asm "; def $0", "=v"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13298,17 +13298,17 @@ define void @s_shuffle_v2f32_v8f32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13340,17 +13340,17 @@ define void @s_shuffle_v2f32_v8f32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13382,17 +13382,17 @@ define void @s_shuffle_v2f32_v8f32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13424,17 +13424,17 @@ define void @s_shuffle_v2f32_v8f32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13464,17 +13464,17 @@ define void @s_shuffle_v2f32_v8f32__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13506,17 +13506,17 @@ define void @s_shuffle_v2f32_v8f32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13548,17 +13548,17 @@ define void @s_shuffle_v2f32_v8f32__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13590,17 +13590,17 @@ define void @s_shuffle_v2f32_v8f32__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -13646,17 +13646,17 @@ define void @s_shuffle_v2f32_v8f32__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13689,17 +13689,17 @@ define void @s_shuffle_v2f32_v8f32__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13732,17 +13732,17 @@ define void @s_shuffle_v2f32_v8f32__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13773,17 +13773,17 @@ define void @s_shuffle_v2f32_v8f32__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13816,17 +13816,17 @@ define void @s_shuffle_v2f32_v8f32__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13859,17 +13859,17 @@ define void @s_shuffle_v2f32_v8f32__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13902,17 +13902,17 @@ define void @s_shuffle_v2f32_v8f32__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -13953,22 +13953,22 @@ define void @s_shuffle_v2f32_v8f32__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14007,20 +14007,20 @@ define void @s_shuffle_v2f32_v8f32__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14061,22 +14061,22 @@ define void @s_shuffle_v2f32_v8f32__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14117,22 +14117,22 @@ define void @s_shuffle_v2f32_v8f32__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14173,21 +14173,21 @@ define void @s_shuffle_v2f32_v8f32__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14226,21 +14226,21 @@ define void @s_shuffle_v2f32_v8f32__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14281,21 +14281,21 @@ define void @s_shuffle_v2f32_v8f32__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14336,21 +14336,21 @@ define void @s_shuffle_v2f32_v8f32__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14385,18 +14385,18 @@ define void @s_shuffle_v2f32_v8f32__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14450,18 +14450,18 @@ define void @s_shuffle_v2f32_v8f32__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14496,18 +14496,18 @@ define void @s_shuffle_v2f32_v8f32__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14542,18 +14542,18 @@ define void @s_shuffle_v2f32_v8f32__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14607,18 +14607,18 @@ define void @s_shuffle_v2f32_v8f32__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14653,18 +14653,18 @@ define void @s_shuffle_v2f32_v8f32__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -14697,17 +14697,17 @@ define void @s_shuffle_v2f32_v8f32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -14759,18 +14759,18 @@ define void @s_shuffle_v2f32_v8f32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -14804,18 +14804,18 @@ define void @s_shuffle_v2f32_v8f32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -14849,18 +14849,18 @@ define void @s_shuffle_v2f32_v8f32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -14912,18 +14912,18 @@ define void @s_shuffle_v2f32_v8f32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -14957,18 +14957,18 @@ define void @s_shuffle_v2f32_v8f32__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15002,18 +15002,18 @@ define void @s_shuffle_v2f32_v8f32__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15045,17 +15045,17 @@ define void @s_shuffle_v2f32_v8f32__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15095,22 +15095,22 @@ define void @s_shuffle_v2f32_v8f32__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15151,22 +15151,22 @@ define void @s_shuffle_v2f32_v8f32__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15207,22 +15207,22 @@ define void @s_shuffle_v2f32_v8f32__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15261,21 +15261,21 @@ define void @s_shuffle_v2f32_v8f32__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15316,22 +15316,22 @@ define void @s_shuffle_v2f32_v8f32__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15372,22 +15372,22 @@ define void @s_shuffle_v2f32_v8f32__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15418,17 +15418,17 @@ define void @s_shuffle_v2f32_v8f32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15458,17 +15458,17 @@ define void @s_shuffle_v2f32_v8f32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15520,18 +15520,18 @@ define void @s_shuffle_v2f32_v8f32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15619,18 +15619,18 @@ define void @s_shuffle_v2f32_v8f32__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15678,17 +15678,17 @@ define void @s_shuffle_v2f32_v8f32__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -15726,20 +15726,20 @@ define void @s_shuffle_v2f32_v8f32__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15780,22 +15780,22 @@ define void @s_shuffle_v2f32_v8f32__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15834,20 +15834,20 @@ define void @s_shuffle_v2f32_v8f32__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15886,21 +15886,21 @@ define void @s_shuffle_v2f32_v8f32__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15939,20 +15939,20 @@ define void @s_shuffle_v2f32_v8f32__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -15993,22 +15993,22 @@ define void @s_shuffle_v2f32_v8f32__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16041,17 +16041,17 @@ define void @s_shuffle_v2f32_v8f32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16103,18 +16103,18 @@ define void @s_shuffle_v2f32_v8f32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16148,18 +16148,18 @@ define void @s_shuffle_v2f32_v8f32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16193,18 +16193,18 @@ define void @s_shuffle_v2f32_v8f32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16256,18 +16256,18 @@ define void @s_shuffle_v2f32_v8f32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16301,18 +16301,18 @@ define void @s_shuffle_v2f32_v8f32__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16346,18 +16346,18 @@ define void @s_shuffle_v2f32_v8f32__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16389,17 +16389,17 @@ define void @s_shuffle_v2f32_v8f32__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16439,22 +16439,22 @@ define void @s_shuffle_v2f32_v8f32__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16495,22 +16495,22 @@ define void @s_shuffle_v2f32_v8f32__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16551,22 +16551,22 @@ define void @s_shuffle_v2f32_v8f32__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16605,21 +16605,21 @@ define void @s_shuffle_v2f32_v8f32__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16660,22 +16660,22 @@ define void @s_shuffle_v2f32_v8f32__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16716,22 +16716,22 @@ define void @s_shuffle_v2f32_v8f32__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -16764,17 +16764,17 @@ define void @s_shuffle_v2f32_v8f32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16826,18 +16826,18 @@ define void @s_shuffle_v2f32_v8f32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16869,17 +16869,17 @@ define void @s_shuffle_v2f32_v8f32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16913,18 +16913,18 @@ define void @s_shuffle_v2f32_v8f32__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -16976,18 +16976,18 @@ define void @s_shuffle_v2f32_v8f32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17021,18 +17021,18 @@ define void @s_shuffle_v2f32_v8f32__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17066,18 +17066,18 @@ define void @s_shuffle_v2f32_v8f32__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17109,17 +17109,17 @@ define void @s_shuffle_v2f32_v8f32__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17159,22 +17159,22 @@ define void @s_shuffle_v2f32_v8f32__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17215,22 +17215,22 @@ define void @s_shuffle_v2f32_v8f32__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17271,22 +17271,22 @@ define void @s_shuffle_v2f32_v8f32__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17325,21 +17325,21 @@ define void @s_shuffle_v2f32_v8f32__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17380,22 +17380,22 @@ define void @s_shuffle_v2f32_v8f32__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17436,22 +17436,22 @@ define void @s_shuffle_v2f32_v8f32__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17484,17 +17484,17 @@ define void @s_shuffle_v2f32_v8f32__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17546,18 +17546,18 @@ define void @s_shuffle_v2f32_v8f32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17591,18 +17591,18 @@ define void @s_shuffle_v2f32_v8f32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17636,18 +17636,18 @@ define void @s_shuffle_v2f32_v8f32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17699,18 +17699,18 @@ define void @s_shuffle_v2f32_v8f32__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17744,18 +17744,18 @@ define void @s_shuffle_v2f32_v8f32__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17789,18 +17789,18 @@ define void @s_shuffle_v2f32_v8f32__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17832,17 +17832,17 @@ define void @s_shuffle_v2f32_v8f32__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -17882,21 +17882,21 @@ define void @s_shuffle_v2f32_v8f32__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17937,21 +17937,21 @@ define void @s_shuffle_v2f32_v8f32__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -17992,21 +17992,21 @@ define void @s_shuffle_v2f32_v8f32__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18045,21 +18045,21 @@ define void @s_shuffle_v2f32_v8f32__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18100,21 +18100,21 @@ define void @s_shuffle_v2f32_v8f32__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18155,21 +18155,21 @@ define void @s_shuffle_v2f32_v8f32__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18200,17 +18200,17 @@ define void @s_shuffle_v2f32_v8f32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18280,18 +18280,18 @@ define void @s_shuffle_v2f32_v8f32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18339,17 +18339,17 @@ define void @s_shuffle_v2f32_v8f32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18401,18 +18401,18 @@ define void @s_shuffle_v2f32_v8f32__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18460,17 +18460,17 @@ define void @s_shuffle_v2f32_v8f32__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18508,21 +18508,21 @@ define void @s_shuffle_v2f32_v8f32__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18563,21 +18563,21 @@ define void @s_shuffle_v2f32_v8f32__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18616,21 +18616,21 @@ define void @s_shuffle_v2f32_v8f32__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18669,21 +18669,21 @@ define void @s_shuffle_v2f32_v8f32__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18722,21 +18722,21 @@ define void @s_shuffle_v2f32_v8f32__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18777,21 +18777,21 @@ define void @s_shuffle_v2f32_v8f32__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -18824,17 +18824,17 @@ define void @s_shuffle_v2f32_v8f32__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18886,18 +18886,18 @@ define void @s_shuffle_v2f32_v8f32__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18931,18 +18931,18 @@ define void @s_shuffle_v2f32_v8f32__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -18976,18 +18976,18 @@ define void @s_shuffle_v2f32_v8f32__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19039,18 +19039,18 @@ define void @s_shuffle_v2f32_v8f32__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19084,18 +19084,18 @@ define void @s_shuffle_v2f32_v8f32__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19129,18 +19129,18 @@ define void @s_shuffle_v2f32_v8f32__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19172,17 +19172,17 @@ define void @s_shuffle_v2f32_v8f32__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19222,21 +19222,21 @@ define void @s_shuffle_v2f32_v8f32__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19277,21 +19277,21 @@ define void @s_shuffle_v2f32_v8f32__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19332,21 +19332,21 @@ define void @s_shuffle_v2f32_v8f32__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19385,21 +19385,21 @@ define void @s_shuffle_v2f32_v8f32__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19440,21 +19440,21 @@ define void @s_shuffle_v2f32_v8f32__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19495,21 +19495,21 @@ define void @s_shuffle_v2f32_v8f32__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19542,17 +19542,17 @@ define void @s_shuffle_v2f32_v8f32__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19604,18 +19604,18 @@ define void @s_shuffle_v2f32_v8f32__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19649,18 +19649,18 @@ define void @s_shuffle_v2f32_v8f32__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19694,18 +19694,18 @@ define void @s_shuffle_v2f32_v8f32__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19757,18 +19757,18 @@ define void @s_shuffle_v2f32_v8f32__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19800,17 +19800,17 @@ define void @s_shuffle_v2f32_v8f32__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19844,18 +19844,18 @@ define void @s_shuffle_v2f32_v8f32__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19887,17 +19887,17 @@ define void @s_shuffle_v2f32_v8f32__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -19937,21 +19937,21 @@ define void @s_shuffle_v2f32_v8f32__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -19992,21 +19992,21 @@ define void @s_shuffle_v2f32_v8f32__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20047,21 +20047,21 @@ define void @s_shuffle_v2f32_v8f32__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20100,21 +20100,21 @@ define void @s_shuffle_v2f32_v8f32__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20155,21 +20155,21 @@ define void @s_shuffle_v2f32_v8f32__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20210,21 +20210,21 @@ define void @s_shuffle_v2f32_v8f32__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20269,17 +20269,17 @@ define void @s_shuffle_v2f32_v8f32__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20311,17 +20311,17 @@ define void @s_shuffle_v2f32_v8f32__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20353,17 +20353,17 @@ define void @s_shuffle_v2f32_v8f32__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20395,17 +20395,17 @@ define void @s_shuffle_v2f32_v8f32__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20435,17 +20435,17 @@ define void @s_shuffle_v2f32_v8f32__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20477,17 +20477,17 @@ define void @s_shuffle_v2f32_v8f32__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20519,17 +20519,17 @@ define void @s_shuffle_v2f32_v8f32__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20561,17 +20561,17 @@ define void @s_shuffle_v2f32_v8f32__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x float> %shuf) @@ -20619,18 +20619,18 @@ define void @s_shuffle_v2f32_v8f32__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20665,18 +20665,18 @@ define void @s_shuffle_v2f32_v8f32__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20711,18 +20711,18 @@ define void @s_shuffle_v2f32_v8f32__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20776,18 +20776,18 @@ define void @s_shuffle_v2f32_v8f32__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20822,18 +20822,18 @@ define void @s_shuffle_v2f32_v8f32__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20864,17 +20864,17 @@ define void @s_shuffle_v2f32_v8f32__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20913,20 +20913,20 @@ define void @s_shuffle_v2f32_v8f32__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -20965,20 +20965,20 @@ define void @s_shuffle_v2f32_v8f32__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21019,22 +21019,22 @@ define void @s_shuffle_v2f32_v8f32__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21073,20 +21073,20 @@ define void @s_shuffle_v2f32_v8f32__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21125,21 +21125,21 @@ define void @s_shuffle_v2f32_v8f32__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21178,20 +21178,20 @@ define void @s_shuffle_v2f32_v8f32__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21232,21 +21232,21 @@ define void @s_shuffle_v2f32_v8f32__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21285,20 +21285,20 @@ define void @s_shuffle_v2f32_v8f32__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21329,17 +21329,17 @@ define void @s_shuffle_v2f32_v8f32__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21393,18 +21393,18 @@ define void @s_shuffle_v2f32_v8f32__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21496,18 +21496,18 @@ define void @s_shuffle_v2f32_v8f32__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21540,17 +21540,17 @@ define void @s_shuffle_v2f32_v8f32__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21589,20 +21589,20 @@ define void @s_shuffle_v2f32_v8f32__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21643,22 +21643,22 @@ define void @s_shuffle_v2f32_v8f32__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21699,22 +21699,22 @@ define void @s_shuffle_v2f32_v8f32__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21755,22 +21755,22 @@ define void @s_shuffle_v2f32_v8f32__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21809,21 +21809,21 @@ define void @s_shuffle_v2f32_v8f32__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21864,21 +21864,21 @@ define void @s_shuffle_v2f32_v8f32__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21919,21 +21919,21 @@ define void @s_shuffle_v2f32_v8f32__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -21974,21 +21974,21 @@ define void @s_shuffle_v2f32_v8f32__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22042,18 +22042,18 @@ define void @s_shuffle_v2f32_v8f32__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22088,18 +22088,18 @@ define void @s_shuffle_v2f32_v8f32__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22134,18 +22134,18 @@ define void @s_shuffle_v2f32_v8f32__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22199,18 +22199,18 @@ define void @s_shuffle_v2f32_v8f32__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22245,18 +22245,18 @@ define void @s_shuffle_v2f32_v8f32__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22289,17 +22289,17 @@ define void @s_shuffle_v2f32_v8f32__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22338,20 +22338,20 @@ define void @s_shuffle_v2f32_v8f32__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22392,22 +22392,22 @@ define void @s_shuffle_v2f32_v8f32__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22448,22 +22448,22 @@ define void @s_shuffle_v2f32_v8f32__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22504,22 +22504,22 @@ define void @s_shuffle_v2f32_v8f32__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22558,21 +22558,21 @@ define void @s_shuffle_v2f32_v8f32__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22613,21 +22613,21 @@ define void @s_shuffle_v2f32_v8f32__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22668,21 +22668,21 @@ define void @s_shuffle_v2f32_v8f32__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22723,21 +22723,21 @@ define void @s_shuffle_v2f32_v8f32__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22791,18 +22791,18 @@ define void @s_shuffle_v2f32_v8f32__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22835,17 +22835,17 @@ define void @s_shuffle_v2f32_v8f32__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22880,18 +22880,18 @@ define void @s_shuffle_v2f32_v8f32__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22945,18 +22945,18 @@ define void @s_shuffle_v2f32_v8f32__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -22991,18 +22991,18 @@ define void @s_shuffle_v2f32_v8f32__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23035,17 +23035,17 @@ define void @s_shuffle_v2f32_v8f32__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23084,20 +23084,20 @@ define void @s_shuffle_v2f32_v8f32__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23138,21 +23138,21 @@ define void @s_shuffle_v2f32_v8f32__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23193,22 +23193,22 @@ define void @s_shuffle_v2f32_v8f32__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s8 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s8 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23249,21 +23249,21 @@ define void @s_shuffle_v2f32_v8f32__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23302,21 +23302,21 @@ define void @s_shuffle_v2f32_v8f32__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23357,21 +23357,21 @@ define void @s_shuffle_v2f32_v8f32__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23412,21 +23412,21 @@ define void @s_shuffle_v2f32_v8f32__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s12 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s12 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23467,21 +23467,21 @@ define void @s_shuffle_v2f32_v8f32__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23535,18 +23535,18 @@ define void @s_shuffle_v2f32_v8f32__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23581,18 +23581,18 @@ define void @s_shuffle_v2f32_v8f32__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23627,18 +23627,18 @@ define void @s_shuffle_v2f32_v8f32__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23692,18 +23692,18 @@ define void @s_shuffle_v2f32_v8f32__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23738,18 +23738,18 @@ define void @s_shuffle_v2f32_v8f32__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23780,17 +23780,17 @@ define void @s_shuffle_v2f32_v8f32__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23829,20 +23829,20 @@ define void @s_shuffle_v2f32_v8f32__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23881,21 +23881,21 @@ define void @s_shuffle_v2f32_v8f32__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23936,22 +23936,22 @@ define void @s_shuffle_v2f32_v8f32__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -23990,21 +23990,21 @@ define void @s_shuffle_v2f32_v8f32__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24043,21 +24043,21 @@ define void @s_shuffle_v2f32_v8f32__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24096,21 +24096,21 @@ define void @s_shuffle_v2f32_v8f32__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24151,21 +24151,21 @@ define void @s_shuffle_v2f32_v8f32__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24204,21 +24204,21 @@ define void @s_shuffle_v2f32_v8f32__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24291,18 +24291,18 @@ define void @s_shuffle_v2f32_v8f32__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24352,17 +24352,17 @@ define void @s_shuffle_v2f32_v8f32__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24416,18 +24416,18 @@ define void @s_shuffle_v2f32_v8f32__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24460,17 +24460,17 @@ define void @s_shuffle_v2f32_v8f32__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24509,20 +24509,20 @@ define void @s_shuffle_v2f32_v8f32__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24563,22 +24563,22 @@ define void @s_shuffle_v2f32_v8f32__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24619,22 +24619,22 @@ define void @s_shuffle_v2f32_v8f32__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24675,22 +24675,22 @@ define void @s_shuffle_v2f32_v8f32__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24729,21 +24729,21 @@ define void @s_shuffle_v2f32_v8f32__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24784,21 +24784,21 @@ define void @s_shuffle_v2f32_v8f32__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24839,21 +24839,21 @@ define void @s_shuffle_v2f32_v8f32__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s14 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s14 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24894,21 +24894,21 @@ define void @s_shuffle_v2f32_v8f32__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -24962,18 +24962,18 @@ define void @s_shuffle_v2f32_v8f32__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25008,18 +25008,18 @@ define void @s_shuffle_v2f32_v8f32__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25054,18 +25054,18 @@ define void @s_shuffle_v2f32_v8f32__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25119,18 +25119,18 @@ define void @s_shuffle_v2f32_v8f32__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25165,18 +25165,18 @@ define void @s_shuffle_v2f32_v8f32__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25209,17 +25209,17 @@ define void @s_shuffle_v2f32_v8f32__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25258,20 +25258,20 @@ define void @s_shuffle_v2f32_v8f32__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25312,22 +25312,22 @@ define void @s_shuffle_v2f32_v8f32__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25368,22 +25368,22 @@ define void @s_shuffle_v2f32_v8f32__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25424,22 +25424,22 @@ define void @s_shuffle_v2f32_v8f32__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25478,21 +25478,21 @@ define void @s_shuffle_v2f32_v8f32__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25533,21 +25533,21 @@ define void @s_shuffle_v2f32_v8f32__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25588,21 +25588,21 @@ define void @s_shuffle_v2f32_v8f32__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25643,21 +25643,21 @@ define void @s_shuffle_v2f32_v8f32__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25711,18 +25711,18 @@ define void @s_shuffle_v2f32_v8f32__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25757,18 +25757,18 @@ define void @s_shuffle_v2f32_v8f32__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25803,18 +25803,18 @@ define void @s_shuffle_v2f32_v8f32__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25868,18 +25868,18 @@ define void @s_shuffle_v2f32_v8f32__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> @@ -25912,17 +25912,17 @@ define void @s_shuffle_v2f32_v8f32__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2f32_v8f32__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2f32_v8f32__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x float> asm "; def $0", "=s"() %vec1 = call <8 x float> asm "; def $0", "=s"() %shuf = shufflevector <8 x float> %vec0, <8 x float> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v2i16.ll index cef92455bd8dd..027a9e371841c 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v2i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i16_v2i16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i16_v2i16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2i16_v2i16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -132,17 +132,17 @@ define void @v_shuffle_v2i16_v2i16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v2i16_v2i16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v2i16_v2i16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v2i16_v2i16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v2i16_v2i16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -373,17 +373,17 @@ define void @v_shuffle_v2i16_v2i16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -417,18 +417,18 @@ define void @v_shuffle_v2i16_v2i16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> zeroinitializer store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -460,17 +460,17 @@ define void @v_shuffle_v2i16_v2i16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -502,17 +502,17 @@ define void @v_shuffle_v2i16_v2i16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -542,16 +542,16 @@ define void @v_shuffle_v2i16_v2i16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -581,16 +581,16 @@ define void @v_shuffle_v2i16_v2i16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -624,18 +624,18 @@ define void @v_shuffle_v2i16_v2i16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -665,16 +665,16 @@ define void @v_shuffle_v2i16_v2i16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -715,16 +715,16 @@ define void @v_shuffle_v2i16_v2i16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -756,17 +756,17 @@ define void @v_shuffle_v2i16_v2i16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -807,16 +807,16 @@ define void @v_shuffle_v2i16_v2i16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -857,22 +857,22 @@ define void @v_shuffle_v2i16_v2i16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -913,22 +913,22 @@ define void @v_shuffle_v2i16_v2i16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -959,16 +959,16 @@ define void @v_shuffle_v2i16_v2i16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v2i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v2i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1013,17 +1013,17 @@ define void @s_shuffle_v2i16_v2i16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1055,17 +1055,17 @@ define void @s_shuffle_v2i16_v2i16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1111,17 +1111,17 @@ define void @s_shuffle_v2i16_v2i16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1162,21 +1162,21 @@ define void @s_shuffle_v2i16_v2i16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1215,20 +1215,20 @@ define void @s_shuffle_v2i16_v2i16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1263,18 +1263,18 @@ define void @s_shuffle_v2i16_v2i16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1307,17 +1307,17 @@ define void @s_shuffle_v2i16_v2i16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1350,17 +1350,17 @@ define void @s_shuffle_v2i16_v2i16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1392,17 +1392,17 @@ define void @s_shuffle_v2i16_v2i16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1436,18 +1436,18 @@ define void @s_shuffle_v2i16_v2i16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1479,17 +1479,17 @@ define void @s_shuffle_v2i16_v2i16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1519,17 +1519,17 @@ define void @s_shuffle_v2i16_v2i16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1559,17 +1559,17 @@ define void @s_shuffle_v2i16_v2i16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1601,17 +1601,17 @@ define void @s_shuffle_v2i16_v2i16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1641,17 +1641,17 @@ define void @s_shuffle_v2i16_v2i16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1695,17 +1695,17 @@ define void @s_shuffle_v2i16_v2i16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1737,17 +1737,17 @@ define void @s_shuffle_v2i16_v2i16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -1791,17 +1791,17 @@ define void @s_shuffle_v2i16_v2i16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1840,20 +1840,20 @@ define void @s_shuffle_v2i16_v2i16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1892,20 +1892,20 @@ define void @s_shuffle_v2i16_v2i16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> @@ -1936,17 +1936,17 @@ define void @s_shuffle_v2i16_v2i16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v2i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v2i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v3i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v3i16.ll index 36a9d54aca8a8..ca29ff1b3792a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v3i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v3i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i16_v3i16__u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v2i16_v3i16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v2i16_v3i16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -122,16 +122,16 @@ define void @v_shuffle_v2i16_v3i16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -176,17 +176,17 @@ define void @v_shuffle_v2i16_v3i16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -219,16 +219,16 @@ define void @v_shuffle_v2i16_v3i16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -271,22 +271,22 @@ define void @v_shuffle_v2i16_v3i16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -329,22 +329,22 @@ define void @v_shuffle_v2i16_v3i16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -387,22 +387,22 @@ define void @v_shuffle_v2i16_v3i16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -439,18 +439,18 @@ define void @v_shuffle_v2i16_v3i16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -487,18 +487,18 @@ define void @v_shuffle_v2i16_v3i16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -535,18 +535,18 @@ define void @v_shuffle_v2i16_v3i16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -581,17 +581,17 @@ define void @v_shuffle_v2i16_v3i16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v2i16_v3i16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> zeroinitializer @@ -670,17 +670,17 @@ define void @v_shuffle_v2i16_v3i16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -715,18 +715,18 @@ define void @v_shuffle_v2i16_v3i16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -759,17 +759,17 @@ define void @v_shuffle_v2i16_v3i16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -808,21 +808,21 @@ define void @v_shuffle_v2i16_v3i16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -855,16 +855,16 @@ define void @v_shuffle_v2i16_v3i16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -895,16 +895,16 @@ define void @v_shuffle_v2i16_v3i16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -939,18 +939,18 @@ define void @v_shuffle_v2i16_v3i16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -985,18 +985,18 @@ define void @v_shuffle_v2i16_v3i16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1027,16 +1027,16 @@ define void @v_shuffle_v2i16_v3i16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1077,22 +1077,22 @@ define void @v_shuffle_v2i16_v3i16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1127,17 +1127,17 @@ define void @v_shuffle_v2i16_v3i16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1172,18 +1172,18 @@ define void @v_shuffle_v2i16_v3i16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1216,17 +1216,17 @@ define void @v_shuffle_v2i16_v3i16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1261,18 +1261,18 @@ define void @v_shuffle_v2i16_v3i16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1305,17 +1305,17 @@ define void @v_shuffle_v2i16_v3i16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1354,21 +1354,21 @@ define void @v_shuffle_v2i16_v3i16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1413,16 +1413,16 @@ define void @v_shuffle_v2i16_v3i16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1455,17 +1455,17 @@ define void @v_shuffle_v2i16_v3i16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1496,16 +1496,16 @@ define void @v_shuffle_v2i16_v3i16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -1550,17 +1550,17 @@ define void @v_shuffle_v2i16_v3i16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1593,16 +1593,16 @@ define void @v_shuffle_v2i16_v3i16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1645,22 +1645,22 @@ define void @v_shuffle_v2i16_v3i16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1703,22 +1703,22 @@ define void @v_shuffle_v2i16_v3i16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1761,22 +1761,22 @@ define void @v_shuffle_v2i16_v3i16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1809,16 +1809,16 @@ define void @v_shuffle_v2i16_v3i16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1855,18 +1855,18 @@ define void @v_shuffle_v2i16_v3i16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1901,17 +1901,17 @@ define void @v_shuffle_v2i16_v3i16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1954,22 +1954,22 @@ define void @v_shuffle_v2i16_v3i16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2010,21 +2010,21 @@ define void @v_shuffle_v2i16_v3i16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2067,22 +2067,22 @@ define void @v_shuffle_v2i16_v3i16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2119,18 +2119,18 @@ define void @v_shuffle_v2i16_v3i16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2165,17 +2165,17 @@ define void @v_shuffle_v2i16_v3i16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v3i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v3i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2223,17 +2223,17 @@ define void @s_shuffle_v2i16_v3i16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2266,17 +2266,17 @@ define void @s_shuffle_v2i16_v3i16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2309,17 +2309,17 @@ define void @s_shuffle_v2i16_v3i16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2367,17 +2367,17 @@ define void @s_shuffle_v2i16_v3i16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2412,17 +2412,17 @@ define void @s_shuffle_v2i16_v3i16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2463,20 +2463,20 @@ define void @s_shuffle_v2i16_v3i16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2517,20 +2517,20 @@ define void @s_shuffle_v2i16_v3i16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2571,20 +2571,20 @@ define void @s_shuffle_v2i16_v3i16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2619,17 +2619,17 @@ define void @s_shuffle_v2i16_v3i16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2664,17 +2664,17 @@ define void @s_shuffle_v2i16_v3i16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2709,17 +2709,17 @@ define void @s_shuffle_v2i16_v3i16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2754,17 +2754,17 @@ define void @s_shuffle_v2i16_v3i16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2797,17 +2797,17 @@ define void @s_shuffle_v2i16_v3i16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> zeroinitializer @@ -2842,18 +2842,18 @@ define void @s_shuffle_v2i16_v3i16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2886,17 +2886,17 @@ define void @s_shuffle_v2i16_v3i16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2929,17 +2929,17 @@ define void @s_shuffle_v2i16_v3i16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -2980,21 +2980,21 @@ define void @s_shuffle_v2i16_v3i16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3027,17 +3027,17 @@ define void @s_shuffle_v2i16_v3i16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3068,17 +3068,17 @@ define void @s_shuffle_v2i16_v3i16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3111,17 +3111,17 @@ define void @s_shuffle_v2i16_v3i16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3154,17 +3154,17 @@ define void @s_shuffle_v2i16_v3i16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3195,17 +3195,17 @@ define void @s_shuffle_v2i16_v3i16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3244,20 +3244,20 @@ define void @s_shuffle_v2i16_v3i16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3292,17 +3292,17 @@ define void @s_shuffle_v2i16_v3i16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3335,17 +3335,17 @@ define void @s_shuffle_v2i16_v3i16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3380,18 +3380,18 @@ define void @s_shuffle_v2i16_v3i16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3424,17 +3424,17 @@ define void @s_shuffle_v2i16_v3i16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3467,17 +3467,17 @@ define void @s_shuffle_v2i16_v3i16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3518,21 +3518,21 @@ define void @s_shuffle_v2i16_v3i16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3580,17 +3580,17 @@ define void @s_shuffle_v2i16_v3i16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3623,17 +3623,17 @@ define void @s_shuffle_v2i16_v3i16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3666,17 +3666,17 @@ define void @s_shuffle_v2i16_v3i16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <2 x i32> @@ -3726,18 +3726,18 @@ define void @s_shuffle_v2i16_v3i16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3770,17 +3770,17 @@ define void @s_shuffle_v2i16_v3i16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3821,20 +3821,20 @@ define void @s_shuffle_v2i16_v3i16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3875,20 +3875,20 @@ define void @s_shuffle_v2i16_v3i16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3929,20 +3929,20 @@ define void @s_shuffle_v2i16_v3i16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3975,17 +3975,17 @@ define void @s_shuffle_v2i16_v3i16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4020,17 +4020,17 @@ define void @s_shuffle_v2i16_v3i16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4065,17 +4065,17 @@ define void @s_shuffle_v2i16_v3i16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4116,20 +4116,20 @@ define void @s_shuffle_v2i16_v3i16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4172,21 +4172,21 @@ define void @s_shuffle_v2i16_v3i16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4227,20 +4227,20 @@ define void @s_shuffle_v2i16_v3i16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4275,17 +4275,17 @@ define void @s_shuffle_v2i16_v3i16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4322,18 +4322,18 @@ define void @s_shuffle_v2i16_v3i16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v3i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v3i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v4i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v4i16.ll index 6cdb42cc613fb..a15919daf07eb 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v4i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v4i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i16_v4i16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i16_v4i16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2i16_v4i16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2i16_v4i16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2i16_v4i16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -213,17 +213,17 @@ define void @v_shuffle_v2i16_v4i16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v2i16_v4i16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -296,17 +296,17 @@ define void @v_shuffle_v2i16_v4i16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -345,21 +345,21 @@ define void @v_shuffle_v2i16_v4i16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -400,22 +400,22 @@ define void @v_shuffle_v2i16_v4i16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -454,21 +454,21 @@ define void @v_shuffle_v2i16_v4i16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -509,22 +509,22 @@ define void @v_shuffle_v2i16_v4i16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -557,17 +557,17 @@ define void @v_shuffle_v2i16_v4i16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -602,18 +602,18 @@ define void @v_shuffle_v2i16_v4i16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -646,17 +646,17 @@ define void @v_shuffle_v2i16_v4i16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -691,18 +691,18 @@ define void @v_shuffle_v2i16_v4i16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -735,17 +735,17 @@ define void @v_shuffle_v2i16_v4i16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -779,18 +779,18 @@ define void @v_shuffle_v2i16_v4i16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> zeroinitializer store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -822,17 +822,17 @@ define void @v_shuffle_v2i16_v4i16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -866,18 +866,18 @@ define void @v_shuffle_v2i16_v4i16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -909,17 +909,17 @@ define void @v_shuffle_v2i16_v4i16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -951,17 +951,17 @@ define void @v_shuffle_v2i16_v4i16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -999,21 +999,21 @@ define void @v_shuffle_v2i16_v4i16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1054,22 +1054,22 @@ define void @v_shuffle_v2i16_v4i16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1100,16 +1100,16 @@ define void @v_shuffle_v2i16_v4i16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1139,16 +1139,16 @@ define void @v_shuffle_v2i16_v4i16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1182,18 +1182,18 @@ define void @v_shuffle_v2i16_v4i16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1227,18 +1227,18 @@ define void @v_shuffle_v2i16_v4i16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1272,18 +1272,18 @@ define void @v_shuffle_v2i16_v4i16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1313,16 +1313,16 @@ define void @v_shuffle_v2i16_v4i16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1362,22 +1362,22 @@ define void @v_shuffle_v2i16_v4i16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1418,22 +1418,22 @@ define void @v_shuffle_v2i16_v4i16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1466,17 +1466,17 @@ define void @v_shuffle_v2i16_v4i16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1510,18 +1510,18 @@ define void @v_shuffle_v2i16_v4i16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1553,17 +1553,17 @@ define void @v_shuffle_v2i16_v4i16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1597,18 +1597,18 @@ define void @v_shuffle_v2i16_v4i16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1640,17 +1640,17 @@ define void @v_shuffle_v2i16_v4i16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v2i16_v4i16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1730,21 +1730,21 @@ define void @v_shuffle_v2i16_v4i16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1785,22 +1785,22 @@ define void @v_shuffle_v2i16_v4i16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -1831,16 +1831,16 @@ define void @v_shuffle_v2i16_v4i16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1874,18 +1874,18 @@ define void @v_shuffle_v2i16_v4i16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1919,18 +1919,18 @@ define void @v_shuffle_v2i16_v4i16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1960,16 +1960,16 @@ define void @v_shuffle_v2i16_v4i16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2003,18 +2003,18 @@ define void @v_shuffle_v2i16_v4i16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2044,16 +2044,16 @@ define void @v_shuffle_v2i16_v4i16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2093,22 +2093,22 @@ define void @v_shuffle_v2i16_v4i16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2149,22 +2149,22 @@ define void @v_shuffle_v2i16_v4i16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2206,16 +2206,16 @@ define void @v_shuffle_v2i16_v4i16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2247,17 +2247,17 @@ define void @v_shuffle_v2i16_v4i16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2287,16 +2287,16 @@ define void @v_shuffle_v2i16_v4i16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2328,17 +2328,17 @@ define void @v_shuffle_v2i16_v4i16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2381,17 +2381,17 @@ define void @v_shuffle_v2i16_v4i16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2426,18 +2426,18 @@ define void @v_shuffle_v2i16_v4i16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2468,16 +2468,16 @@ define void @v_shuffle_v2i16_v4i16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2518,22 +2518,22 @@ define void @v_shuffle_v2i16_v4i16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2574,22 +2574,22 @@ define void @v_shuffle_v2i16_v4i16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2630,22 +2630,22 @@ define void @v_shuffle_v2i16_v4i16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2686,22 +2686,22 @@ define void @v_shuffle_v2i16_v4i16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2732,16 +2732,16 @@ define void @v_shuffle_v2i16_v4i16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2776,18 +2776,18 @@ define void @v_shuffle_v2i16_v4i16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2822,18 +2822,18 @@ define void @v_shuffle_v2i16_v4i16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2866,17 +2866,17 @@ define void @v_shuffle_v2i16_v4i16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2917,22 +2917,22 @@ define void @v_shuffle_v2i16_v4i16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -2971,21 +2971,21 @@ define void @v_shuffle_v2i16_v4i16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3026,22 +3026,22 @@ define void @v_shuffle_v2i16_v4i16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3080,21 +3080,21 @@ define void @v_shuffle_v2i16_v4i16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3129,18 +3129,18 @@ define void @v_shuffle_v2i16_v4i16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3173,17 +3173,17 @@ define void @v_shuffle_v2i16_v4i16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3218,18 +3218,18 @@ define void @v_shuffle_v2i16_v4i16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3260,16 +3260,16 @@ define void @v_shuffle_v2i16_v4i16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3310,22 +3310,22 @@ define void @v_shuffle_v2i16_v4i16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3366,22 +3366,22 @@ define void @v_shuffle_v2i16_v4i16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3422,22 +3422,22 @@ define void @v_shuffle_v2i16_v4i16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3478,22 +3478,22 @@ define void @v_shuffle_v2i16_v4i16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3528,18 +3528,18 @@ define void @v_shuffle_v2i16_v4i16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3574,18 +3574,18 @@ define void @v_shuffle_v2i16_v4i16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3616,16 +3616,16 @@ define void @v_shuffle_v2i16_v4i16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v4i16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v4i16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3670,17 +3670,17 @@ define void @s_shuffle_v2i16_v4i16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -3712,17 +3712,17 @@ define void @s_shuffle_v2i16_v4i16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -3754,17 +3754,17 @@ define void @s_shuffle_v2i16_v4i16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -3796,17 +3796,17 @@ define void @s_shuffle_v2i16_v4i16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -3852,17 +3852,17 @@ define void @s_shuffle_v2i16_v4i16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3895,17 +3895,17 @@ define void @s_shuffle_v2i16_v4i16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3938,17 +3938,17 @@ define void @s_shuffle_v2i16_v4i16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -3989,21 +3989,21 @@ define void @s_shuffle_v2i16_v4i16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4042,20 +4042,20 @@ define void @s_shuffle_v2i16_v4i16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4096,21 +4096,21 @@ define void @s_shuffle_v2i16_v4i16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4149,20 +4149,20 @@ define void @s_shuffle_v2i16_v4i16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4197,18 +4197,18 @@ define void @s_shuffle_v2i16_v4i16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4241,17 +4241,17 @@ define void @s_shuffle_v2i16_v4i16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4286,18 +4286,18 @@ define void @s_shuffle_v2i16_v4i16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4330,17 +4330,17 @@ define void @s_shuffle_v2i16_v4i16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4373,17 +4373,17 @@ define void @s_shuffle_v2i16_v4i16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4415,17 +4415,17 @@ define void @s_shuffle_v2i16_v4i16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4459,18 +4459,18 @@ define void @s_shuffle_v2i16_v4i16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4502,17 +4502,17 @@ define void @s_shuffle_v2i16_v4i16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4546,18 +4546,18 @@ define void @s_shuffle_v2i16_v4i16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4589,17 +4589,17 @@ define void @s_shuffle_v2i16_v4i16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4639,21 +4639,21 @@ define void @s_shuffle_v2i16_v4i16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4692,20 +4692,20 @@ define void @s_shuffle_v2i16_v4i16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -4736,17 +4736,17 @@ define void @s_shuffle_v2i16_v4i16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4776,17 +4776,17 @@ define void @s_shuffle_v2i16_v4i16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4818,17 +4818,17 @@ define void @s_shuffle_v2i16_v4i16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4860,17 +4860,17 @@ define void @s_shuffle_v2i16_v4i16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4902,17 +4902,17 @@ define void @s_shuffle_v2i16_v4i16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4942,17 +4942,17 @@ define void @s_shuffle_v2i16_v4i16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -4990,20 +4990,20 @@ define void @s_shuffle_v2i16_v4i16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5042,20 +5042,20 @@ define void @s_shuffle_v2i16_v4i16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5088,17 +5088,17 @@ define void @s_shuffle_v2i16_v4i16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5130,17 +5130,17 @@ define void @s_shuffle_v2i16_v4i16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5174,18 +5174,18 @@ define void @s_shuffle_v2i16_v4i16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5217,17 +5217,17 @@ define void @s_shuffle_v2i16_v4i16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5261,18 +5261,18 @@ define void @s_shuffle_v2i16_v4i16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5304,17 +5304,17 @@ define void @s_shuffle_v2i16_v4i16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5354,21 +5354,21 @@ define void @s_shuffle_v2i16_v4i16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5407,20 +5407,20 @@ define void @s_shuffle_v2i16_v4i16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5453,17 +5453,17 @@ define void @s_shuffle_v2i16_v4i16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5495,17 +5495,17 @@ define void @s_shuffle_v2i16_v4i16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5537,17 +5537,17 @@ define void @s_shuffle_v2i16_v4i16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5579,17 +5579,17 @@ define void @s_shuffle_v2i16_v4i16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5621,17 +5621,17 @@ define void @s_shuffle_v2i16_v4i16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5663,17 +5663,17 @@ define void @s_shuffle_v2i16_v4i16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5711,20 +5711,20 @@ define void @s_shuffle_v2i16_v4i16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5763,20 +5763,20 @@ define void @s_shuffle_v2i16_v4i16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -5821,17 +5821,17 @@ define void @s_shuffle_v2i16_v4i16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5863,17 +5863,17 @@ define void @s_shuffle_v2i16_v4i16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5905,17 +5905,17 @@ define void @s_shuffle_v2i16_v4i16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -5947,17 +5947,17 @@ define void @s_shuffle_v2i16_v4i16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -6005,18 +6005,18 @@ define void @s_shuffle_v2i16_v4i16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6049,17 +6049,17 @@ define void @s_shuffle_v2i16_v4i16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6090,17 +6090,17 @@ define void @s_shuffle_v2i16_v4i16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6139,20 +6139,20 @@ define void @s_shuffle_v2i16_v4i16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6191,20 +6191,20 @@ define void @s_shuffle_v2i16_v4i16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6243,20 +6243,20 @@ define void @s_shuffle_v2i16_v4i16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6295,20 +6295,20 @@ define void @s_shuffle_v2i16_v4i16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6339,17 +6339,17 @@ define void @s_shuffle_v2i16_v4i16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6382,17 +6382,17 @@ define void @s_shuffle_v2i16_v4i16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6425,17 +6425,17 @@ define void @s_shuffle_v2i16_v4i16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6468,17 +6468,17 @@ define void @s_shuffle_v2i16_v4i16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6517,20 +6517,20 @@ define void @s_shuffle_v2i16_v4i16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6571,21 +6571,21 @@ define void @s_shuffle_v2i16_v4i16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6624,20 +6624,20 @@ define void @s_shuffle_v2i16_v4i16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6678,21 +6678,21 @@ define void @s_shuffle_v2i16_v4i16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6725,17 +6725,17 @@ define void @s_shuffle_v2i16_v4i16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6770,18 +6770,18 @@ define void @s_shuffle_v2i16_v4i16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6814,17 +6814,17 @@ define void @s_shuffle_v2i16_v4i16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6857,17 +6857,17 @@ define void @s_shuffle_v2i16_v4i16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6906,20 +6906,20 @@ define void @s_shuffle_v2i16_v4i16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -6958,20 +6958,20 @@ define void @s_shuffle_v2i16_v4i16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -7010,20 +7010,20 @@ define void @s_shuffle_v2i16_v4i16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -7062,20 +7062,20 @@ define void @s_shuffle_v2i16_v4i16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -7108,17 +7108,17 @@ define void @s_shuffle_v2i16_v4i16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -7151,17 +7151,17 @@ define void @s_shuffle_v2i16_v4i16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> @@ -7194,17 +7194,17 @@ define void @s_shuffle_v2i16_v4i16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v4i16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v4i16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v8i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v8i16.ll index 340854a745fe8..40eab86f27b5a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v8i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i16.v8i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i16_v8i16__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i16_v8i16__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -79,17 +79,17 @@ define void @v_shuffle_v2i16_v8i16__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -119,16 +119,16 @@ define void @v_shuffle_v2i16_v8i16__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -160,17 +160,17 @@ define void @v_shuffle_v2i16_v8i16__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -200,16 +200,16 @@ define void @v_shuffle_v2i16_v8i16__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -241,17 +241,17 @@ define void @v_shuffle_v2i16_v8i16__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -281,16 +281,16 @@ define void @v_shuffle_v2i16_v8i16__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -322,17 +322,17 @@ define void @v_shuffle_v2i16_v8i16__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -375,17 +375,17 @@ define void @v_shuffle_v2i16_v8i16__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -416,16 +416,16 @@ define void @v_shuffle_v2i16_v8i16__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -458,17 +458,17 @@ define void @v_shuffle_v2i16_v8i16__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -499,16 +499,16 @@ define void @v_shuffle_v2i16_v8i16__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -541,17 +541,17 @@ define void @v_shuffle_v2i16_v8i16__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -582,16 +582,16 @@ define void @v_shuffle_v2i16_v8i16__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -624,17 +624,17 @@ define void @v_shuffle_v2i16_v8i16__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -673,21 +673,21 @@ define void @v_shuffle_v2i16_v8i16__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -728,22 +728,22 @@ define void @v_shuffle_v2i16_v8i16__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -782,21 +782,21 @@ define void @v_shuffle_v2i16_v8i16__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v5, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v5, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -837,22 +837,22 @@ define void @v_shuffle_v2i16_v8i16__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -891,21 +891,21 @@ define void @v_shuffle_v2i16_v8i16__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -946,22 +946,22 @@ define void @v_shuffle_v2i16_v8i16__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1000,21 +1000,21 @@ define void @v_shuffle_v2i16_v8i16__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v7, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v7, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1055,22 +1055,22 @@ define void @v_shuffle_v2i16_v8i16__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1103,17 +1103,17 @@ define void @v_shuffle_v2i16_v8i16__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1148,18 +1148,18 @@ define void @v_shuffle_v2i16_v8i16__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1192,17 +1192,17 @@ define void @v_shuffle_v2i16_v8i16__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1237,18 +1237,18 @@ define void @v_shuffle_v2i16_v8i16__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1281,17 +1281,17 @@ define void @v_shuffle_v2i16_v8i16__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1326,18 +1326,18 @@ define void @v_shuffle_v2i16_v8i16__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1370,17 +1370,17 @@ define void @v_shuffle_v2i16_v8i16__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1415,18 +1415,18 @@ define void @v_shuffle_v2i16_v8i16__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1459,17 +1459,17 @@ define void @v_shuffle_v2i16_v8i16__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1503,18 +1503,18 @@ define void @v_shuffle_v2i16_v8i16__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> zeroinitializer store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1546,17 +1546,17 @@ define void @v_shuffle_v2i16_v8i16__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1590,18 +1590,18 @@ define void @v_shuffle_v2i16_v8i16__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1633,17 +1633,17 @@ define void @v_shuffle_v2i16_v8i16__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1677,18 +1677,18 @@ define void @v_shuffle_v2i16_v8i16__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1720,17 +1720,17 @@ define void @v_shuffle_v2i16_v8i16__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1764,18 +1764,18 @@ define void @v_shuffle_v2i16_v8i16__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1807,17 +1807,17 @@ define void @v_shuffle_v2i16_v8i16__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1849,17 +1849,17 @@ define void @v_shuffle_v2i16_v8i16__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -1897,21 +1897,21 @@ define void @v_shuffle_v2i16_v8i16__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -1952,22 +1952,22 @@ define void @v_shuffle_v2i16_v8i16__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2006,21 +2006,21 @@ define void @v_shuffle_v2i16_v8i16__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2061,22 +2061,22 @@ define void @v_shuffle_v2i16_v8i16__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2115,21 +2115,21 @@ define void @v_shuffle_v2i16_v8i16__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2170,22 +2170,22 @@ define void @v_shuffle_v2i16_v8i16__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2216,16 +2216,16 @@ define void @v_shuffle_v2i16_v8i16__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2255,16 +2255,16 @@ define void @v_shuffle_v2i16_v8i16__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2298,18 +2298,18 @@ define void @v_shuffle_v2i16_v8i16__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2343,18 +2343,18 @@ define void @v_shuffle_v2i16_v8i16__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v2i16_v8i16__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2433,18 +2433,18 @@ define void @v_shuffle_v2i16_v8i16__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2478,18 +2478,18 @@ define void @v_shuffle_v2i16_v8i16__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2523,18 +2523,18 @@ define void @v_shuffle_v2i16_v8i16__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2568,18 +2568,18 @@ define void @v_shuffle_v2i16_v8i16__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2609,16 +2609,16 @@ define void @v_shuffle_v2i16_v8i16__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -2658,22 +2658,22 @@ define void @v_shuffle_v2i16_v8i16__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2714,22 +2714,22 @@ define void @v_shuffle_v2i16_v8i16__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v2i16_v8i16__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2826,22 +2826,22 @@ define void @v_shuffle_v2i16_v8i16__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2882,22 +2882,22 @@ define void @v_shuffle_v2i16_v8i16__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2938,22 +2938,22 @@ define void @v_shuffle_v2i16_v8i16__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v0 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v0 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -2986,17 +2986,17 @@ define void @v_shuffle_v2i16_v8i16__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3030,18 +3030,18 @@ define void @v_shuffle_v2i16_v8i16__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3073,17 +3073,17 @@ define void @v_shuffle_v2i16_v8i16__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3117,18 +3117,18 @@ define void @v_shuffle_v2i16_v8i16__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3160,17 +3160,17 @@ define void @v_shuffle_v2i16_v8i16__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3204,18 +3204,18 @@ define void @v_shuffle_v2i16_v8i16__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3247,17 +3247,17 @@ define void @v_shuffle_v2i16_v8i16__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3291,18 +3291,18 @@ define void @v_shuffle_v2i16_v8i16__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3334,17 +3334,17 @@ define void @v_shuffle_v2i16_v8i16__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3376,17 +3376,17 @@ define void @v_shuffle_v2i16_v8i16__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3424,21 +3424,21 @@ define void @v_shuffle_v2i16_v8i16__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3479,22 +3479,22 @@ define void @v_shuffle_v2i16_v8i16__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3533,21 +3533,21 @@ define void @v_shuffle_v2i16_v8i16__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3588,22 +3588,22 @@ define void @v_shuffle_v2i16_v8i16__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3642,21 +3642,21 @@ define void @v_shuffle_v2i16_v8i16__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v4, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v4, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3697,22 +3697,22 @@ define void @v_shuffle_v2i16_v8i16__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v5, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -3743,16 +3743,16 @@ define void @v_shuffle_v2i16_v8i16__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3786,18 +3786,18 @@ define void @v_shuffle_v2i16_v8i16__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3831,18 +3831,18 @@ define void @v_shuffle_v2i16_v8i16__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3872,16 +3872,16 @@ define void @v_shuffle_v2i16_v8i16__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3915,18 +3915,18 @@ define void @v_shuffle_v2i16_v8i16__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -3960,18 +3960,18 @@ define void @v_shuffle_v2i16_v8i16__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4005,18 +4005,18 @@ define void @v_shuffle_v2i16_v8i16__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4050,18 +4050,18 @@ define void @v_shuffle_v2i16_v8i16__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4095,18 +4095,18 @@ define void @v_shuffle_v2i16_v8i16__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4136,16 +4136,16 @@ define void @v_shuffle_v2i16_v8i16__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4185,22 +4185,22 @@ define void @v_shuffle_v2i16_v8i16__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4241,22 +4241,22 @@ define void @v_shuffle_v2i16_v8i16__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4297,22 +4297,22 @@ define void @v_shuffle_v2i16_v8i16__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4353,22 +4353,22 @@ define void @v_shuffle_v2i16_v8i16__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v4, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4409,22 +4409,22 @@ define void @v_shuffle_v2i16_v8i16__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v4, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4465,22 +4465,22 @@ define void @v_shuffle_v2i16_v8i16__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v1 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v1 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -4513,17 +4513,17 @@ define void @v_shuffle_v2i16_v8i16__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4557,18 +4557,18 @@ define void @v_shuffle_v2i16_v8i16__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4600,17 +4600,17 @@ define void @v_shuffle_v2i16_v8i16__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4644,18 +4644,18 @@ define void @v_shuffle_v2i16_v8i16__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4687,17 +4687,17 @@ define void @v_shuffle_v2i16_v8i16__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4731,18 +4731,18 @@ define void @v_shuffle_v2i16_v8i16__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4774,17 +4774,17 @@ define void @v_shuffle_v2i16_v8i16__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4818,18 +4818,18 @@ define void @v_shuffle_v2i16_v8i16__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4861,17 +4861,17 @@ define void @v_shuffle_v2i16_v8i16__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4903,17 +4903,17 @@ define void @v_shuffle_v2i16_v8i16__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -4951,21 +4951,21 @@ define void @v_shuffle_v2i16_v8i16__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5006,22 +5006,22 @@ define void @v_shuffle_v2i16_v8i16__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5060,21 +5060,21 @@ define void @v_shuffle_v2i16_v8i16__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5115,22 +5115,22 @@ define void @v_shuffle_v2i16_v8i16__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5169,21 +5169,21 @@ define void @v_shuffle_v2i16_v8i16__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v2i16_v8i16__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5270,16 +5270,16 @@ define void @v_shuffle_v2i16_v8i16__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5313,18 +5313,18 @@ define void @v_shuffle_v2i16_v8i16__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5358,18 +5358,18 @@ define void @v_shuffle_v2i16_v8i16__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5403,18 +5403,18 @@ define void @v_shuffle_v2i16_v8i16__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5448,18 +5448,18 @@ define void @v_shuffle_v2i16_v8i16__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5489,16 +5489,16 @@ define void @v_shuffle_v2i16_v8i16__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5532,18 +5532,18 @@ define void @v_shuffle_v2i16_v8i16__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5577,18 +5577,18 @@ define void @v_shuffle_v2i16_v8i16__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5622,18 +5622,18 @@ define void @v_shuffle_v2i16_v8i16__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5663,16 +5663,16 @@ define void @v_shuffle_v2i16_v8i16__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -5712,22 +5712,22 @@ define void @v_shuffle_v2i16_v8i16__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5768,22 +5768,22 @@ define void @v_shuffle_v2i16_v8i16__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5824,22 +5824,22 @@ define void @v_shuffle_v2i16_v8i16__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5880,22 +5880,22 @@ define void @v_shuffle_v2i16_v8i16__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5936,22 +5936,22 @@ define void @v_shuffle_v2i16_v8i16__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -5992,22 +5992,22 @@ define void @v_shuffle_v2i16_v8i16__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6040,17 +6040,17 @@ define void @v_shuffle_v2i16_v8i16__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6084,18 +6084,18 @@ define void @v_shuffle_v2i16_v8i16__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6127,17 +6127,17 @@ define void @v_shuffle_v2i16_v8i16__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6171,18 +6171,18 @@ define void @v_shuffle_v2i16_v8i16__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6214,17 +6214,17 @@ define void @v_shuffle_v2i16_v8i16__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6258,18 +6258,18 @@ define void @v_shuffle_v2i16_v8i16__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6301,17 +6301,17 @@ define void @v_shuffle_v2i16_v8i16__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6345,18 +6345,18 @@ define void @v_shuffle_v2i16_v8i16__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6388,17 +6388,17 @@ define void @v_shuffle_v2i16_v8i16__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6430,17 +6430,17 @@ define void @v_shuffle_v2i16_v8i16__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6478,21 +6478,21 @@ define void @v_shuffle_v2i16_v8i16__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v4, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v4, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6533,22 +6533,22 @@ define void @v_shuffle_v2i16_v8i16__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6587,21 +6587,21 @@ define void @v_shuffle_v2i16_v8i16__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v5, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v5, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6642,22 +6642,22 @@ define void @v_shuffle_v2i16_v8i16__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6696,21 +6696,21 @@ define void @v_shuffle_v2i16_v8i16__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v6, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v6, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6751,22 +6751,22 @@ define void @v_shuffle_v2i16_v8i16__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v7, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -6797,16 +6797,16 @@ define void @v_shuffle_v2i16_v8i16__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6840,18 +6840,18 @@ define void @v_shuffle_v2i16_v8i16__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6885,18 +6885,18 @@ define void @v_shuffle_v2i16_v8i16__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6930,18 +6930,18 @@ define void @v_shuffle_v2i16_v8i16__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -6975,18 +6975,18 @@ define void @v_shuffle_v2i16_v8i16__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7020,18 +7020,18 @@ define void @v_shuffle_v2i16_v8i16__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7065,18 +7065,18 @@ define void @v_shuffle_v2i16_v8i16__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7106,16 +7106,16 @@ define void @v_shuffle_v2i16_v8i16__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7149,18 +7149,18 @@ define void @v_shuffle_v2i16_v8i16__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7190,16 +7190,16 @@ define void @v_shuffle_v2i16_v8i16__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7239,22 +7239,22 @@ define void @v_shuffle_v2i16_v8i16__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v4, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v4, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7295,22 +7295,22 @@ define void @v_shuffle_v2i16_v8i16__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v5, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7351,22 +7351,22 @@ define void @v_shuffle_v2i16_v8i16__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v5, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7407,22 +7407,22 @@ define void @v_shuffle_v2i16_v8i16__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v6, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7463,22 +7463,22 @@ define void @v_shuffle_v2i16_v8i16__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v6, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7519,22 +7519,22 @@ define void @v_shuffle_v2i16_v8i16__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v3 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v7, v3 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7576,16 +7576,16 @@ define void @v_shuffle_v2i16_v8i16__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7617,17 +7617,17 @@ define void @v_shuffle_v2i16_v8i16__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7657,16 +7657,16 @@ define void @v_shuffle_v2i16_v8i16__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7698,17 +7698,17 @@ define void @v_shuffle_v2i16_v8i16__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7738,16 +7738,16 @@ define void @v_shuffle_v2i16_v8i16__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7779,17 +7779,17 @@ define void @v_shuffle_v2i16_v8i16__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7819,16 +7819,16 @@ define void @v_shuffle_v2i16_v8i16__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7860,17 +7860,17 @@ define void @v_shuffle_v2i16_v8i16__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 @@ -7913,17 +7913,17 @@ define void @v_shuffle_v2i16_v8i16__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -7958,18 +7958,18 @@ define void @v_shuffle_v2i16_v8i16__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8002,17 +8002,17 @@ define void @v_shuffle_v2i16_v8i16__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8047,18 +8047,18 @@ define void @v_shuffle_v2i16_v8i16__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8091,17 +8091,17 @@ define void @v_shuffle_v2i16_v8i16__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8136,18 +8136,18 @@ define void @v_shuffle_v2i16_v8i16__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8178,16 +8178,16 @@ define void @v_shuffle_v2i16_v8i16__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8228,22 +8228,22 @@ define void @v_shuffle_v2i16_v8i16__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8284,22 +8284,22 @@ define void @v_shuffle_v2i16_v8i16__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8340,22 +8340,22 @@ define void @v_shuffle_v2i16_v8i16__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8396,22 +8396,22 @@ define void @v_shuffle_v2i16_v8i16__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8452,22 +8452,22 @@ define void @v_shuffle_v2i16_v8i16__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8508,22 +8508,22 @@ define void @v_shuffle_v2i16_v8i16__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8564,22 +8564,22 @@ define void @v_shuffle_v2i16_v8i16__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v4 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v4 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8620,22 +8620,22 @@ define void @v_shuffle_v2i16_v8i16__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8666,16 +8666,16 @@ define void @v_shuffle_v2i16_v8i16__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8710,18 +8710,18 @@ define void @v_shuffle_v2i16_v8i16__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8756,18 +8756,18 @@ define void @v_shuffle_v2i16_v8i16__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8802,18 +8802,18 @@ define void @v_shuffle_v2i16_v8i16__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8848,18 +8848,18 @@ define void @v_shuffle_v2i16_v8i16__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8894,18 +8894,18 @@ define void @v_shuffle_v2i16_v8i16__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8940,18 +8940,18 @@ define void @v_shuffle_v2i16_v8i16__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -8984,17 +8984,17 @@ define void @v_shuffle_v2i16_v8i16__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9035,22 +9035,22 @@ define void @v_shuffle_v2i16_v8i16__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9089,21 +9089,21 @@ define void @v_shuffle_v2i16_v8i16__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9144,22 +9144,22 @@ define void @v_shuffle_v2i16_v8i16__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9198,21 +9198,21 @@ define void @v_shuffle_v2i16_v8i16__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9253,22 +9253,22 @@ define void @v_shuffle_v2i16_v8i16__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9307,21 +9307,21 @@ define void @v_shuffle_v2i16_v8i16__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9362,22 +9362,22 @@ define void @v_shuffle_v2i16_v8i16__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9416,21 +9416,21 @@ define void @v_shuffle_v2i16_v8i16__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9465,18 +9465,18 @@ define void @v_shuffle_v2i16_v8i16__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9509,17 +9509,17 @@ define void @v_shuffle_v2i16_v8i16__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9554,18 +9554,18 @@ define void @v_shuffle_v2i16_v8i16__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9598,17 +9598,17 @@ define void @v_shuffle_v2i16_v8i16__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9643,18 +9643,18 @@ define void @v_shuffle_v2i16_v8i16__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9687,17 +9687,17 @@ define void @v_shuffle_v2i16_v8i16__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9732,18 +9732,18 @@ define void @v_shuffle_v2i16_v8i16__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9774,16 +9774,16 @@ define void @v_shuffle_v2i16_v8i16__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9824,22 +9824,22 @@ define void @v_shuffle_v2i16_v8i16__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9880,22 +9880,22 @@ define void @v_shuffle_v2i16_v8i16__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9936,22 +9936,22 @@ define void @v_shuffle_v2i16_v8i16__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -9992,22 +9992,22 @@ define void @v_shuffle_v2i16_v8i16__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10048,22 +10048,22 @@ define void @v_shuffle_v2i16_v8i16__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10104,22 +10104,22 @@ define void @v_shuffle_v2i16_v8i16__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10160,22 +10160,22 @@ define void @v_shuffle_v2i16_v8i16__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v5 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v5 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10216,22 +10216,22 @@ define void @v_shuffle_v2i16_v8i16__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10266,18 +10266,18 @@ define void @v_shuffle_v2i16_v8i16__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10312,18 +10312,18 @@ define void @v_shuffle_v2i16_v8i16__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10354,16 +10354,16 @@ define void @v_shuffle_v2i16_v8i16__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10398,18 +10398,18 @@ define void @v_shuffle_v2i16_v8i16__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10444,18 +10444,18 @@ define void @v_shuffle_v2i16_v8i16__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10490,18 +10490,18 @@ define void @v_shuffle_v2i16_v8i16__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10536,18 +10536,18 @@ define void @v_shuffle_v2i16_v8i16__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10580,17 +10580,17 @@ define void @v_shuffle_v2i16_v8i16__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10631,22 +10631,22 @@ define void @v_shuffle_v2i16_v8i16__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10685,21 +10685,21 @@ define void @v_shuffle_v2i16_v8i16__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10740,22 +10740,22 @@ define void @v_shuffle_v2i16_v8i16__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10794,21 +10794,21 @@ define void @v_shuffle_v2i16_v8i16__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v4, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v4, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10849,22 +10849,22 @@ define void @v_shuffle_v2i16_v8i16__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10903,21 +10903,21 @@ define void @v_shuffle_v2i16_v8i16__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -10958,22 +10958,22 @@ define void @v_shuffle_v2i16_v8i16__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11012,21 +11012,21 @@ define void @v_shuffle_v2i16_v8i16__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v6, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v6, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11061,18 +11061,18 @@ define void @v_shuffle_v2i16_v8i16__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11105,17 +11105,17 @@ define void @v_shuffle_v2i16_v8i16__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11150,18 +11150,18 @@ define void @v_shuffle_v2i16_v8i16__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11194,17 +11194,17 @@ define void @v_shuffle_v2i16_v8i16__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11239,18 +11239,18 @@ define void @v_shuffle_v2i16_v8i16__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11283,17 +11283,17 @@ define void @v_shuffle_v2i16_v8i16__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11328,18 +11328,18 @@ define void @v_shuffle_v2i16_v8i16__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11370,16 +11370,16 @@ define void @v_shuffle_v2i16_v8i16__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11420,22 +11420,22 @@ define void @v_shuffle_v2i16_v8i16__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11476,22 +11476,22 @@ define void @v_shuffle_v2i16_v8i16__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11532,22 +11532,22 @@ define void @v_shuffle_v2i16_v8i16__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v4 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v4 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11588,22 +11588,22 @@ define void @v_shuffle_v2i16_v8i16__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v4, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11644,22 +11644,22 @@ define void @v_shuffle_v2i16_v8i16__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11700,22 +11700,22 @@ define void @v_shuffle_v2i16_v8i16__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11756,22 +11756,22 @@ define void @v_shuffle_v2i16_v8i16__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v6 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v6 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11812,22 +11812,22 @@ define void @v_shuffle_v2i16_v8i16__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v6, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11862,18 +11862,18 @@ define void @v_shuffle_v2i16_v8i16__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11908,18 +11908,18 @@ define void @v_shuffle_v2i16_v8i16__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -11954,18 +11954,18 @@ define void @v_shuffle_v2i16_v8i16__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12000,18 +12000,18 @@ define void @v_shuffle_v2i16_v8i16__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12042,16 +12042,16 @@ define void @v_shuffle_v2i16_v8i16__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12086,18 +12086,18 @@ define void @v_shuffle_v2i16_v8i16__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12132,18 +12132,18 @@ define void @v_shuffle_v2i16_v8i16__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12176,17 +12176,17 @@ define void @v_shuffle_v2i16_v8i16__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12227,22 +12227,22 @@ define void @v_shuffle_v2i16_v8i16__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12281,21 +12281,21 @@ define void @v_shuffle_v2i16_v8i16__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v0, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v0, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12336,22 +12336,22 @@ define void @v_shuffle_v2i16_v8i16__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12390,21 +12390,21 @@ define void @v_shuffle_v2i16_v8i16__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v5, v1, 16 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v5, v1, 16 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12445,22 +12445,22 @@ define void @v_shuffle_v2i16_v8i16__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12499,21 +12499,21 @@ define void @v_shuffle_v2i16_v8i16__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v2, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v2, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12554,22 +12554,22 @@ define void @v_shuffle_v2i16_v8i16__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12608,21 +12608,21 @@ define void @v_shuffle_v2i16_v8i16__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v7, v3, 16 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v7, v3, 16 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12657,18 +12657,18 @@ define void @v_shuffle_v2i16_v8i16__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12701,17 +12701,17 @@ define void @v_shuffle_v2i16_v8i16__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12746,18 +12746,18 @@ define void @v_shuffle_v2i16_v8i16__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12790,17 +12790,17 @@ define void @v_shuffle_v2i16_v8i16__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12835,18 +12835,18 @@ define void @v_shuffle_v2i16_v8i16__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12879,17 +12879,17 @@ define void @v_shuffle_v2i16_v8i16__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v2, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12924,18 +12924,18 @@ define void @v_shuffle_v2i16_v8i16__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -12966,16 +12966,16 @@ define void @v_shuffle_v2i16_v8i16__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13016,22 +13016,22 @@ define void @v_shuffle_v2i16_v8i16__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13072,22 +13072,22 @@ define void @v_shuffle_v2i16_v8i16__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v0, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13128,22 +13128,22 @@ define void @v_shuffle_v2i16_v8i16__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v5 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v5 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13184,22 +13184,22 @@ define void @v_shuffle_v2i16_v8i16__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 -; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v5, v1, s2 +; GFX942-NEXT: global_store_dword v6, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13240,22 +13240,22 @@ define void @v_shuffle_v2i16_v8i16__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13296,22 +13296,22 @@ define void @v_shuffle_v2i16_v8i16__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v2, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13352,22 +13352,22 @@ define void @v_shuffle_v2i16_v8i16__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v7 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v7 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13408,22 +13408,22 @@ define void @v_shuffle_v2i16_v8i16__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 -; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v7, v3, s2 +; GFX942-NEXT: global_store_dword v8, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13458,18 +13458,18 @@ define void @v_shuffle_v2i16_v8i16__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13504,18 +13504,18 @@ define void @v_shuffle_v2i16_v8i16__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13550,18 +13550,18 @@ define void @v_shuffle_v2i16_v8i16__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13596,18 +13596,18 @@ define void @v_shuffle_v2i16_v8i16__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13642,18 +13642,18 @@ define void @v_shuffle_v2i16_v8i16__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v2, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13688,18 +13688,18 @@ define void @v_shuffle_v2i16_v8i16__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v2, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13730,16 +13730,16 @@ define void @v_shuffle_v2i16_v8i16__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i16_v8i16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=v"() %vec1 = call <8 x i16> asm "; def $0", "=v"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -13784,17 +13784,17 @@ define void @s_shuffle_v2i16_v8i16__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -13826,17 +13826,17 @@ define void @s_shuffle_v2i16_v8i16__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -13868,17 +13868,17 @@ define void @s_shuffle_v2i16_v8i16__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -13910,17 +13910,17 @@ define void @s_shuffle_v2i16_v8i16__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -13952,17 +13952,17 @@ define void @s_shuffle_v2i16_v8i16__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -13994,17 +13994,17 @@ define void @s_shuffle_v2i16_v8i16__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -14036,17 +14036,17 @@ define void @s_shuffle_v2i16_v8i16__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -14078,17 +14078,17 @@ define void @s_shuffle_v2i16_v8i16__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -14134,17 +14134,17 @@ define void @s_shuffle_v2i16_v8i16__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14177,17 +14177,17 @@ define void @s_shuffle_v2i16_v8i16__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14220,17 +14220,17 @@ define void @s_shuffle_v2i16_v8i16__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14263,17 +14263,17 @@ define void @s_shuffle_v2i16_v8i16__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14306,17 +14306,17 @@ define void @s_shuffle_v2i16_v8i16__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14349,17 +14349,17 @@ define void @s_shuffle_v2i16_v8i16__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14392,17 +14392,17 @@ define void @s_shuffle_v2i16_v8i16__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14443,21 +14443,21 @@ define void @s_shuffle_v2i16_v8i16__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14496,20 +14496,20 @@ define void @s_shuffle_v2i16_v8i16__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14550,21 +14550,21 @@ define void @s_shuffle_v2i16_v8i16__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14603,20 +14603,20 @@ define void @s_shuffle_v2i16_v8i16__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s7, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14657,21 +14657,21 @@ define void @s_shuffle_v2i16_v8i16__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14710,20 +14710,20 @@ define void @s_shuffle_v2i16_v8i16__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s7, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14764,21 +14764,21 @@ define void @s_shuffle_v2i16_v8i16__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s7, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s7, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14817,20 +14817,20 @@ define void @s_shuffle_v2i16_v8i16__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s7, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14865,18 +14865,18 @@ define void @s_shuffle_v2i16_v8i16__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14909,17 +14909,17 @@ define void @s_shuffle_v2i16_v8i16__15_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14954,18 +14954,18 @@ define void @s_shuffle_v2i16_v8i16__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -14998,17 +14998,17 @@ define void @s_shuffle_v2i16_v8i16__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15043,18 +15043,18 @@ define void @s_shuffle_v2i16_v8i16__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15087,17 +15087,17 @@ define void @s_shuffle_v2i16_v8i16__15_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15132,18 +15132,18 @@ define void @s_shuffle_v2i16_v8i16__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15176,17 +15176,17 @@ define void @s_shuffle_v2i16_v8i16__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15219,17 +15219,17 @@ define void @s_shuffle_v2i16_v8i16__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15261,17 +15261,17 @@ define void @s_shuffle_v2i16_v8i16__0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15305,18 +15305,18 @@ define void @s_shuffle_v2i16_v8i16__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15348,17 +15348,17 @@ define void @s_shuffle_v2i16_v8i16__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15392,18 +15392,18 @@ define void @s_shuffle_v2i16_v8i16__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15435,17 +15435,17 @@ define void @s_shuffle_v2i16_v8i16__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15479,18 +15479,18 @@ define void @s_shuffle_v2i16_v8i16__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15522,17 +15522,17 @@ define void @s_shuffle_v2i16_v8i16__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15566,18 +15566,18 @@ define void @s_shuffle_v2i16_v8i16__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15609,17 +15609,17 @@ define void @s_shuffle_v2i16_v8i16__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -15659,21 +15659,21 @@ define void @s_shuffle_v2i16_v8i16__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15712,20 +15712,20 @@ define void @s_shuffle_v2i16_v8i16__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15766,21 +15766,21 @@ define void @s_shuffle_v2i16_v8i16__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15819,20 +15819,20 @@ define void @s_shuffle_v2i16_v8i16__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15873,21 +15873,21 @@ define void @s_shuffle_v2i16_v8i16__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15926,20 +15926,20 @@ define void @s_shuffle_v2i16_v8i16__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -15970,17 +15970,17 @@ define void @s_shuffle_v2i16_v8i16__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16010,17 +16010,17 @@ define void @s_shuffle_v2i16_v8i16__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16052,17 +16052,17 @@ define void @s_shuffle_v2i16_v8i16__1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16094,17 +16094,17 @@ define void @s_shuffle_v2i16_v8i16__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16136,17 +16136,17 @@ define void @s_shuffle_v2i16_v8i16__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16178,17 +16178,17 @@ define void @s_shuffle_v2i16_v8i16__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16220,17 +16220,17 @@ define void @s_shuffle_v2i16_v8i16__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16262,17 +16262,17 @@ define void @s_shuffle_v2i16_v8i16__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16304,17 +16304,17 @@ define void @s_shuffle_v2i16_v8i16__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16344,17 +16344,17 @@ define void @s_shuffle_v2i16_v8i16__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16392,20 +16392,20 @@ define void @s_shuffle_v2i16_v8i16__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s4, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16444,20 +16444,20 @@ define void @s_shuffle_v2i16_v8i16__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16496,20 +16496,20 @@ define void @s_shuffle_v2i16_v8i16__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s5, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16548,20 +16548,20 @@ define void @s_shuffle_v2i16_v8i16__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16600,20 +16600,20 @@ define void @s_shuffle_v2i16_v8i16__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s6, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16652,20 +16652,20 @@ define void @s_shuffle_v2i16_v8i16__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s7, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -16698,17 +16698,17 @@ define void @s_shuffle_v2i16_v8i16__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16740,17 +16740,17 @@ define void @s_shuffle_v2i16_v8i16__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16784,18 +16784,18 @@ define void @s_shuffle_v2i16_v8i16__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16827,17 +16827,17 @@ define void @s_shuffle_v2i16_v8i16__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16871,18 +16871,18 @@ define void @s_shuffle_v2i16_v8i16__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16914,17 +16914,17 @@ define void @s_shuffle_v2i16_v8i16__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -16958,18 +16958,18 @@ define void @s_shuffle_v2i16_v8i16__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17001,17 +17001,17 @@ define void @s_shuffle_v2i16_v8i16__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17045,18 +17045,18 @@ define void @s_shuffle_v2i16_v8i16__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17088,17 +17088,17 @@ define void @s_shuffle_v2i16_v8i16__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17138,21 +17138,21 @@ define void @s_shuffle_v2i16_v8i16__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17191,20 +17191,20 @@ define void @s_shuffle_v2i16_v8i16__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17245,21 +17245,21 @@ define void @s_shuffle_v2i16_v8i16__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17298,20 +17298,20 @@ define void @s_shuffle_v2i16_v8i16__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17352,21 +17352,21 @@ define void @s_shuffle_v2i16_v8i16__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17405,20 +17405,20 @@ define void @s_shuffle_v2i16_v8i16__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17451,17 +17451,17 @@ define void @s_shuffle_v2i16_v8i16__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17493,17 +17493,17 @@ define void @s_shuffle_v2i16_v8i16__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17535,17 +17535,17 @@ define void @s_shuffle_v2i16_v8i16__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17577,17 +17577,17 @@ define void @s_shuffle_v2i16_v8i16__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17619,17 +17619,17 @@ define void @s_shuffle_v2i16_v8i16__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17661,17 +17661,17 @@ define void @s_shuffle_v2i16_v8i16__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17703,17 +17703,17 @@ define void @s_shuffle_v2i16_v8i16__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17745,17 +17745,17 @@ define void @s_shuffle_v2i16_v8i16__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17787,17 +17787,17 @@ define void @s_shuffle_v2i16_v8i16__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17829,17 +17829,17 @@ define void @s_shuffle_v2i16_v8i16__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -17877,20 +17877,20 @@ define void @s_shuffle_v2i16_v8i16__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s4, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17929,20 +17929,20 @@ define void @s_shuffle_v2i16_v8i16__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s5, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -17981,20 +17981,20 @@ define void @s_shuffle_v2i16_v8i16__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s5, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18033,20 +18033,20 @@ define void @s_shuffle_v2i16_v8i16__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s6, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18085,20 +18085,20 @@ define void @s_shuffle_v2i16_v8i16__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s6, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18137,20 +18137,20 @@ define void @s_shuffle_v2i16_v8i16__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s7, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18183,17 +18183,17 @@ define void @s_shuffle_v2i16_v8i16__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18225,17 +18225,17 @@ define void @s_shuffle_v2i16_v8i16__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18269,18 +18269,18 @@ define void @s_shuffle_v2i16_v8i16__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18312,17 +18312,17 @@ define void @s_shuffle_v2i16_v8i16__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18356,18 +18356,18 @@ define void @s_shuffle_v2i16_v8i16__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18399,17 +18399,17 @@ define void @s_shuffle_v2i16_v8i16__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18443,18 +18443,18 @@ define void @s_shuffle_v2i16_v8i16__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18486,17 +18486,17 @@ define void @s_shuffle_v2i16_v8i16__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18530,18 +18530,18 @@ define void @s_shuffle_v2i16_v8i16__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18573,17 +18573,17 @@ define void @s_shuffle_v2i16_v8i16__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18623,21 +18623,21 @@ define void @s_shuffle_v2i16_v8i16__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18676,20 +18676,20 @@ define void @s_shuffle_v2i16_v8i16__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18730,21 +18730,21 @@ define void @s_shuffle_v2i16_v8i16__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18783,20 +18783,20 @@ define void @s_shuffle_v2i16_v8i16__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18837,21 +18837,21 @@ define void @s_shuffle_v2i16_v8i16__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18890,20 +18890,20 @@ define void @s_shuffle_v2i16_v8i16__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -18936,17 +18936,17 @@ define void @s_shuffle_v2i16_v8i16__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -18978,17 +18978,17 @@ define void @s_shuffle_v2i16_v8i16__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19020,17 +19020,17 @@ define void @s_shuffle_v2i16_v8i16__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19062,17 +19062,17 @@ define void @s_shuffle_v2i16_v8i16__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19104,17 +19104,17 @@ define void @s_shuffle_v2i16_v8i16__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19146,17 +19146,17 @@ define void @s_shuffle_v2i16_v8i16__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19188,17 +19188,17 @@ define void @s_shuffle_v2i16_v8i16__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19230,17 +19230,17 @@ define void @s_shuffle_v2i16_v8i16__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19272,17 +19272,17 @@ define void @s_shuffle_v2i16_v8i16__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19314,17 +19314,17 @@ define void @s_shuffle_v2i16_v8i16__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19362,20 +19362,20 @@ define void @s_shuffle_v2i16_v8i16__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s4, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19414,20 +19414,20 @@ define void @s_shuffle_v2i16_v8i16__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s5, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19466,20 +19466,20 @@ define void @s_shuffle_v2i16_v8i16__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s5, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19518,20 +19518,20 @@ define void @s_shuffle_v2i16_v8i16__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s6, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19570,20 +19570,20 @@ define void @s_shuffle_v2i16_v8i16__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s6, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19622,20 +19622,20 @@ define void @s_shuffle_v2i16_v8i16__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s7, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -19668,17 +19668,17 @@ define void @s_shuffle_v2i16_v8i16__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19710,17 +19710,17 @@ define void @s_shuffle_v2i16_v8i16__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19754,18 +19754,18 @@ define void @s_shuffle_v2i16_v8i16__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19797,17 +19797,17 @@ define void @s_shuffle_v2i16_v8i16__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19841,18 +19841,18 @@ define void @s_shuffle_v2i16_v8i16__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19884,17 +19884,17 @@ define void @s_shuffle_v2i16_v8i16__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19928,18 +19928,18 @@ define void @s_shuffle_v2i16_v8i16__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -19971,17 +19971,17 @@ define void @s_shuffle_v2i16_v8i16__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20015,18 +20015,18 @@ define void @s_shuffle_v2i16_v8i16__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20058,17 +20058,17 @@ define void @s_shuffle_v2i16_v8i16__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20108,21 +20108,21 @@ define void @s_shuffle_v2i16_v8i16__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s4, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20161,20 +20161,20 @@ define void @s_shuffle_v2i16_v8i16__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s5, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20215,21 +20215,21 @@ define void @s_shuffle_v2i16_v8i16__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s5, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20268,20 +20268,20 @@ define void @s_shuffle_v2i16_v8i16__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s6, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20322,21 +20322,21 @@ define void @s_shuffle_v2i16_v8i16__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s6, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s6, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20375,20 +20375,20 @@ define void @s_shuffle_v2i16_v8i16__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s7, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20421,17 +20421,17 @@ define void @s_shuffle_v2i16_v8i16__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20463,17 +20463,17 @@ define void @s_shuffle_v2i16_v8i16__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20505,17 +20505,17 @@ define void @s_shuffle_v2i16_v8i16__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20547,17 +20547,17 @@ define void @s_shuffle_v2i16_v8i16__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20589,17 +20589,17 @@ define void @s_shuffle_v2i16_v8i16__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20631,17 +20631,17 @@ define void @s_shuffle_v2i16_v8i16__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20673,17 +20673,17 @@ define void @s_shuffle_v2i16_v8i16__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20715,17 +20715,17 @@ define void @s_shuffle_v2i16_v8i16__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20757,17 +20757,17 @@ define void @s_shuffle_v2i16_v8i16__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20799,17 +20799,17 @@ define void @s_shuffle_v2i16_v8i16__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -20847,20 +20847,20 @@ define void @s_shuffle_v2i16_v8i16__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s4, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20899,20 +20899,20 @@ define void @s_shuffle_v2i16_v8i16__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s5, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -20951,20 +20951,20 @@ define void @s_shuffle_v2i16_v8i16__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s5, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21003,20 +21003,20 @@ define void @s_shuffle_v2i16_v8i16__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s6, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21055,20 +21055,20 @@ define void @s_shuffle_v2i16_v8i16__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s6, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21107,20 +21107,20 @@ define void @s_shuffle_v2i16_v8i16__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s7, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21165,17 +21165,17 @@ define void @s_shuffle_v2i16_v8i16__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21207,17 +21207,17 @@ define void @s_shuffle_v2i16_v8i16__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21249,17 +21249,17 @@ define void @s_shuffle_v2i16_v8i16__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21291,17 +21291,17 @@ define void @s_shuffle_v2i16_v8i16__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21333,17 +21333,17 @@ define void @s_shuffle_v2i16_v8i16__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21375,17 +21375,17 @@ define void @s_shuffle_v2i16_v8i16__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21417,17 +21417,17 @@ define void @s_shuffle_v2i16_v8i16__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21459,17 +21459,17 @@ define void @s_shuffle_v2i16_v8i16__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) @@ -21517,18 +21517,18 @@ define void @s_shuffle_v2i16_v8i16__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21561,17 +21561,17 @@ define void @s_shuffle_v2i16_v8i16__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21606,18 +21606,18 @@ define void @s_shuffle_v2i16_v8i16__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21650,17 +21650,17 @@ define void @s_shuffle_v2i16_v8i16__12_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21695,18 +21695,18 @@ define void @s_shuffle_v2i16_v8i16__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21739,17 +21739,17 @@ define void @s_shuffle_v2i16_v8i16__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21780,17 +21780,17 @@ define void @s_shuffle_v2i16_v8i16__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21829,20 +21829,20 @@ define void @s_shuffle_v2i16_v8i16__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21881,20 +21881,20 @@ define void @s_shuffle_v2i16_v8i16__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21933,20 +21933,20 @@ define void @s_shuffle_v2i16_v8i16__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -21985,20 +21985,20 @@ define void @s_shuffle_v2i16_v8i16__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22037,20 +22037,20 @@ define void @s_shuffle_v2i16_v8i16__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22089,20 +22089,20 @@ define void @s_shuffle_v2i16_v8i16__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22141,20 +22141,20 @@ define void @s_shuffle_v2i16_v8i16__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22193,20 +22193,20 @@ define void @s_shuffle_v2i16_v8i16__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22237,17 +22237,17 @@ define void @s_shuffle_v2i16_v8i16__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22280,17 +22280,17 @@ define void @s_shuffle_v2i16_v8i16__9_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22323,17 +22323,17 @@ define void @s_shuffle_v2i16_v8i16__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22366,17 +22366,17 @@ define void @s_shuffle_v2i16_v8i16__11_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22409,17 +22409,17 @@ define void @s_shuffle_v2i16_v8i16__12_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22452,17 +22452,17 @@ define void @s_shuffle_v2i16_v8i16__13_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22495,17 +22495,17 @@ define void @s_shuffle_v2i16_v8i16__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22538,17 +22538,17 @@ define void @s_shuffle_v2i16_v8i16__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22587,20 +22587,20 @@ define void @s_shuffle_v2i16_v8i16__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22641,21 +22641,21 @@ define void @s_shuffle_v2i16_v8i16__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22694,20 +22694,20 @@ define void @s_shuffle_v2i16_v8i16__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22748,21 +22748,21 @@ define void @s_shuffle_v2i16_v8i16__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22801,20 +22801,20 @@ define void @s_shuffle_v2i16_v8i16__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22855,21 +22855,21 @@ define void @s_shuffle_v2i16_v8i16__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22908,20 +22908,20 @@ define void @s_shuffle_v2i16_v8i16__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -22962,21 +22962,21 @@ define void @s_shuffle_v2i16_v8i16__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23009,17 +23009,17 @@ define void @s_shuffle_v2i16_v8i16__8_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23054,18 +23054,18 @@ define void @s_shuffle_v2i16_v8i16__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23098,17 +23098,17 @@ define void @s_shuffle_v2i16_v8i16__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23143,18 +23143,18 @@ define void @s_shuffle_v2i16_v8i16__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23187,17 +23187,17 @@ define void @s_shuffle_v2i16_v8i16__12_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23232,18 +23232,18 @@ define void @s_shuffle_v2i16_v8i16__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23276,17 +23276,17 @@ define void @s_shuffle_v2i16_v8i16__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23319,17 +23319,17 @@ define void @s_shuffle_v2i16_v8i16__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23368,20 +23368,20 @@ define void @s_shuffle_v2i16_v8i16__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23420,20 +23420,20 @@ define void @s_shuffle_v2i16_v8i16__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23472,20 +23472,20 @@ define void @s_shuffle_v2i16_v8i16__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23524,20 +23524,20 @@ define void @s_shuffle_v2i16_v8i16__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23576,20 +23576,20 @@ define void @s_shuffle_v2i16_v8i16__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23628,20 +23628,20 @@ define void @s_shuffle_v2i16_v8i16__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23680,20 +23680,20 @@ define void @s_shuffle_v2i16_v8i16__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23732,20 +23732,20 @@ define void @s_shuffle_v2i16_v8i16__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23778,17 +23778,17 @@ define void @s_shuffle_v2i16_v8i16__8_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23821,17 +23821,17 @@ define void @s_shuffle_v2i16_v8i16__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23864,17 +23864,17 @@ define void @s_shuffle_v2i16_v8i16__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23907,17 +23907,17 @@ define void @s_shuffle_v2i16_v8i16__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23950,17 +23950,17 @@ define void @s_shuffle_v2i16_v8i16__12_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -23993,17 +23993,17 @@ define void @s_shuffle_v2i16_v8i16__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24036,17 +24036,17 @@ define void @s_shuffle_v2i16_v8i16__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24079,17 +24079,17 @@ define void @s_shuffle_v2i16_v8i16__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24128,20 +24128,20 @@ define void @s_shuffle_v2i16_v8i16__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24182,21 +24182,21 @@ define void @s_shuffle_v2i16_v8i16__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24235,20 +24235,20 @@ define void @s_shuffle_v2i16_v8i16__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24289,21 +24289,21 @@ define void @s_shuffle_v2i16_v8i16__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24342,20 +24342,20 @@ define void @s_shuffle_v2i16_v8i16__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24396,21 +24396,21 @@ define void @s_shuffle_v2i16_v8i16__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24449,20 +24449,20 @@ define void @s_shuffle_v2i16_v8i16__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24503,21 +24503,21 @@ define void @s_shuffle_v2i16_v8i16__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24550,17 +24550,17 @@ define void @s_shuffle_v2i16_v8i16__8_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24595,18 +24595,18 @@ define void @s_shuffle_v2i16_v8i16__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24639,17 +24639,17 @@ define void @s_shuffle_v2i16_v8i16__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24684,18 +24684,18 @@ define void @s_shuffle_v2i16_v8i16__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24728,17 +24728,17 @@ define void @s_shuffle_v2i16_v8i16__12_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24773,18 +24773,18 @@ define void @s_shuffle_v2i16_v8i16__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24817,17 +24817,17 @@ define void @s_shuffle_v2i16_v8i16__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24860,17 +24860,17 @@ define void @s_shuffle_v2i16_v8i16__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24909,20 +24909,20 @@ define void @s_shuffle_v2i16_v8i16__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -24961,20 +24961,20 @@ define void @s_shuffle_v2i16_v8i16__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25013,20 +25013,20 @@ define void @s_shuffle_v2i16_v8i16__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25065,20 +25065,20 @@ define void @s_shuffle_v2i16_v8i16__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25117,20 +25117,20 @@ define void @s_shuffle_v2i16_v8i16__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25169,20 +25169,20 @@ define void @s_shuffle_v2i16_v8i16__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25221,20 +25221,20 @@ define void @s_shuffle_v2i16_v8i16__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25273,20 +25273,20 @@ define void @s_shuffle_v2i16_v8i16__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25319,17 +25319,17 @@ define void @s_shuffle_v2i16_v8i16__8_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25362,17 +25362,17 @@ define void @s_shuffle_v2i16_v8i16__9_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25405,17 +25405,17 @@ define void @s_shuffle_v2i16_v8i16__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25448,17 +25448,17 @@ define void @s_shuffle_v2i16_v8i16__11_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25491,17 +25491,17 @@ define void @s_shuffle_v2i16_v8i16__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25534,17 +25534,17 @@ define void @s_shuffle_v2i16_v8i16__13_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25577,17 +25577,17 @@ define void @s_shuffle_v2i16_v8i16__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25620,17 +25620,17 @@ define void @s_shuffle_v2i16_v8i16__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25669,20 +25669,20 @@ define void @s_shuffle_v2i16_v8i16__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25723,21 +25723,21 @@ define void @s_shuffle_v2i16_v8i16__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25776,20 +25776,20 @@ define void @s_shuffle_v2i16_v8i16__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25830,21 +25830,21 @@ define void @s_shuffle_v2i16_v8i16__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25883,20 +25883,20 @@ define void @s_shuffle_v2i16_v8i16__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25937,21 +25937,21 @@ define void @s_shuffle_v2i16_v8i16__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -25990,20 +25990,20 @@ define void @s_shuffle_v2i16_v8i16__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26044,21 +26044,21 @@ define void @s_shuffle_v2i16_v8i16__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26091,17 +26091,17 @@ define void @s_shuffle_v2i16_v8i16__8_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26136,18 +26136,18 @@ define void @s_shuffle_v2i16_v8i16__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26180,17 +26180,17 @@ define void @s_shuffle_v2i16_v8i16__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26225,18 +26225,18 @@ define void @s_shuffle_v2i16_v8i16__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26269,17 +26269,17 @@ define void @s_shuffle_v2i16_v8i16__12_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26314,18 +26314,18 @@ define void @s_shuffle_v2i16_v8i16__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26358,17 +26358,17 @@ define void @s_shuffle_v2i16_v8i16__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26401,17 +26401,17 @@ define void @s_shuffle_v2i16_v8i16__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26450,20 +26450,20 @@ define void @s_shuffle_v2i16_v8i16__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26502,20 +26502,20 @@ define void @s_shuffle_v2i16_v8i16__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26554,20 +26554,20 @@ define void @s_shuffle_v2i16_v8i16__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26606,20 +26606,20 @@ define void @s_shuffle_v2i16_v8i16__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26658,20 +26658,20 @@ define void @s_shuffle_v2i16_v8i16__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26710,20 +26710,20 @@ define void @s_shuffle_v2i16_v8i16__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26762,20 +26762,20 @@ define void @s_shuffle_v2i16_v8i16__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26814,20 +26814,20 @@ define void @s_shuffle_v2i16_v8i16__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26860,17 +26860,17 @@ define void @s_shuffle_v2i16_v8i16__8_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26903,17 +26903,17 @@ define void @s_shuffle_v2i16_v8i16__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26946,17 +26946,17 @@ define void @s_shuffle_v2i16_v8i16__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -26989,17 +26989,17 @@ define void @s_shuffle_v2i16_v8i16__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -27032,17 +27032,17 @@ define void @s_shuffle_v2i16_v8i16__12_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -27075,17 +27075,17 @@ define void @s_shuffle_v2i16_v8i16__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> @@ -27118,17 +27118,17 @@ define void @s_shuffle_v2i16_v8i16__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i16_v8i16__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i16> asm "; def $0", "=s"() %vec1 = call <8 x i16> asm "; def $0", "=s"() %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll index 2d27d7199ddf4..676a521757bd8 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v2i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i32_v2i32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i32_v2i32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v2i32_v2i32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v2i32_v2i32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v2i32_v2i32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v2i32_v2i32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -282,17 +282,17 @@ define void @v_shuffle_v2i32_v2i32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -325,17 +325,17 @@ define void @v_shuffle_v2i32_v2i32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -368,17 +368,17 @@ define void @v_shuffle_v2i32_v2i32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -410,17 +410,17 @@ define void @v_shuffle_v2i32_v2i32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> zeroinitializer store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -452,17 +452,17 @@ define void @v_shuffle_v2i32_v2i32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -494,17 +494,17 @@ define void @v_shuffle_v2i32_v2i32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -534,16 +534,16 @@ define void @v_shuffle_v2i32_v2i32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -573,16 +573,16 @@ define void @v_shuffle_v2i32_v2i32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -614,17 +614,17 @@ define void @v_shuffle_v2i32_v2i32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -654,16 +654,16 @@ define void @v_shuffle_v2i32_v2i32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -704,16 +704,16 @@ define void @v_shuffle_v2i32_v2i32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -745,17 +745,17 @@ define void @v_shuffle_v2i32_v2i32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -796,16 +796,16 @@ define void @v_shuffle_v2i32_v2i32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -844,21 +844,21 @@ define void @v_shuffle_v2i32_v2i32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -897,21 +897,21 @@ define void @v_shuffle_v2i32_v2i32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -942,16 +942,16 @@ define void @v_shuffle_v2i32_v2i32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v2i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v2i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -996,17 +996,17 @@ define void @s_shuffle_v2i32_v2i32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1038,17 +1038,17 @@ define void @s_shuffle_v2i32_v2i32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1094,17 +1094,17 @@ define void @s_shuffle_v2i32_v2i32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1145,21 +1145,21 @@ define void @s_shuffle_v2i32_v2i32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1198,20 +1198,20 @@ define void @s_shuffle_v2i32_v2i32__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1246,18 +1246,18 @@ define void @s_shuffle_v2i32_v2i32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1309,17 +1309,17 @@ define void @s_shuffle_v2i32_v2i32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1371,18 +1371,18 @@ define void @s_shuffle_v2i32_v2i32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1414,17 +1414,17 @@ define void @s_shuffle_v2i32_v2i32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1454,17 +1454,17 @@ define void @s_shuffle_v2i32_v2i32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1494,17 +1494,17 @@ define void @s_shuffle_v2i32_v2i32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1552,17 +1552,17 @@ define void @s_shuffle_v2i32_v2i32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1606,17 +1606,17 @@ define void @s_shuffle_v2i32_v2i32__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1648,17 +1648,17 @@ define void @s_shuffle_v2i32_v2i32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -1702,17 +1702,17 @@ define void @s_shuffle_v2i32_v2i32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1751,20 +1751,20 @@ define void @s_shuffle_v2i32_v2i32__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1803,20 +1803,20 @@ define void @s_shuffle_v2i32_v2i32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> @@ -1847,17 +1847,17 @@ define void @s_shuffle_v2i32_v2i32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v2i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v2i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll index ea08df2e4f50f..f65340470feb1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v3i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i32_v3i32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i32_v3i32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2i32_v3i32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -120,17 +120,17 @@ define void @v_shuffle_v2i32_v3i32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -172,17 +172,17 @@ define void @v_shuffle_v2i32_v3i32__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -215,17 +215,17 @@ define void @v_shuffle_v2i32_v3i32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -266,22 +266,22 @@ define void @v_shuffle_v2i32_v3i32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v2i32_v3i32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -374,21 +374,21 @@ define void @v_shuffle_v2i32_v3i32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -422,18 +422,18 @@ define void @v_shuffle_v2i32_v3i32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -466,17 +466,17 @@ define void @v_shuffle_v2i32_v3i32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -510,18 +510,18 @@ define void @v_shuffle_v2i32_v3i32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -554,17 +554,17 @@ define void @v_shuffle_v2i32_v3i32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -596,17 +596,17 @@ define void @v_shuffle_v2i32_v3i32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> zeroinitializer store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -638,17 +638,17 @@ define void @v_shuffle_v2i32_v3i32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -681,18 +681,18 @@ define void @v_shuffle_v2i32_v3i32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -724,17 +724,17 @@ define void @v_shuffle_v2i32_v3i32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -772,21 +772,21 @@ define void @v_shuffle_v2i32_v3i32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -817,16 +817,16 @@ define void @v_shuffle_v2i32_v3i32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -856,16 +856,16 @@ define void @v_shuffle_v2i32_v3i32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -897,17 +897,17 @@ define void @v_shuffle_v2i32_v3i32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -939,17 +939,17 @@ define void @v_shuffle_v2i32_v3i32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -979,16 +979,16 @@ define void @v_shuffle_v2i32_v3i32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1026,21 +1026,21 @@ define void @v_shuffle_v2i32_v3i32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1072,17 +1072,17 @@ define void @v_shuffle_v2i32_v3i32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1114,17 +1114,17 @@ define void @v_shuffle_v2i32_v3i32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1156,18 +1156,18 @@ define void @v_shuffle_v2i32_v3i32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1200,18 +1200,18 @@ define void @v_shuffle_v2i32_v3i32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1242,17 +1242,17 @@ define void @v_shuffle_v2i32_v3i32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1291,21 +1291,21 @@ define void @v_shuffle_v2i32_v3i32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1347,16 +1347,16 @@ define void @v_shuffle_v2i32_v3i32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1387,17 +1387,17 @@ define void @v_shuffle_v2i32_v3i32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1429,17 +1429,17 @@ define void @v_shuffle_v2i32_v3i32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1482,17 +1482,17 @@ define void @v_shuffle_v2i32_v3i32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1523,16 +1523,16 @@ define void @v_shuffle_v2i32_v3i32__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1571,21 +1571,21 @@ define void @v_shuffle_v2i32_v3i32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1624,21 +1624,21 @@ define void @v_shuffle_v2i32_v3i32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1677,21 +1677,21 @@ define void @v_shuffle_v2i32_v3i32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1722,16 +1722,16 @@ define void @v_shuffle_v2i32_v3i32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1764,17 +1764,17 @@ define void @v_shuffle_v2i32_v3i32__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1806,17 +1806,17 @@ define void @v_shuffle_v2i32_v3i32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1855,21 +1855,21 @@ define void @v_shuffle_v2i32_v3i32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1909,21 +1909,21 @@ define void @v_shuffle_v2i32_v3i32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -1963,21 +1963,21 @@ define void @v_shuffle_v2i32_v3i32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2010,17 +2010,17 @@ define void @v_shuffle_v2i32_v3i32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2053,18 +2053,18 @@ define void @v_shuffle_v2i32_v3i32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v3i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v3i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2109,17 +2109,17 @@ define void @s_shuffle_v2i32_v3i32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2151,17 +2151,17 @@ define void @s_shuffle_v2i32_v3i32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2193,17 +2193,17 @@ define void @s_shuffle_v2i32_v3i32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2249,17 +2249,17 @@ define void @s_shuffle_v2i32_v3i32__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2292,17 +2292,17 @@ define void @s_shuffle_v2i32_v3i32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2343,21 +2343,21 @@ define void @s_shuffle_v2i32_v3i32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2396,20 +2396,20 @@ define void @s_shuffle_v2i32_v3i32__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2450,21 +2450,21 @@ define void @s_shuffle_v2i32_v3i32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2499,18 +2499,18 @@ define void @s_shuffle_v2i32_v3i32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2564,18 +2564,18 @@ define void @s_shuffle_v2i32_v3i32__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2608,17 +2608,17 @@ define void @s_shuffle_v2i32_v3i32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2670,18 +2670,18 @@ define void @s_shuffle_v2i32_v3i32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2715,18 +2715,18 @@ define void @s_shuffle_v2i32_v3i32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2758,17 +2758,17 @@ define void @s_shuffle_v2i32_v3i32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2808,21 +2808,21 @@ define void @s_shuffle_v2i32_v3i32__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -2853,17 +2853,17 @@ define void @s_shuffle_v2i32_v3i32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2893,17 +2893,17 @@ define void @s_shuffle_v2i32_v3i32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -2969,17 +2969,17 @@ define void @s_shuffle_v2i32_v3i32__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3017,20 +3017,20 @@ define void @s_shuffle_v2i32_v3i32__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3063,17 +3063,17 @@ define void @s_shuffle_v2i32_v3i32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3125,18 +3125,18 @@ define void @s_shuffle_v2i32_v3i32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3170,18 +3170,18 @@ define void @s_shuffle_v2i32_v3i32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3213,17 +3213,17 @@ define void @s_shuffle_v2i32_v3i32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3263,21 +3263,21 @@ define void @s_shuffle_v2i32_v3i32__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3322,17 +3322,17 @@ define void @s_shuffle_v2i32_v3i32__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3364,17 +3364,17 @@ define void @s_shuffle_v2i32_v3i32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3406,17 +3406,17 @@ define void @s_shuffle_v2i32_v3i32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3464,18 +3464,18 @@ define void @s_shuffle_v2i32_v3i32__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3506,17 +3506,17 @@ define void @s_shuffle_v2i32_v3i32__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3555,20 +3555,20 @@ define void @s_shuffle_v2i32_v3i32__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3607,20 +3607,20 @@ define void @s_shuffle_v2i32_v3i32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3659,20 +3659,20 @@ define void @s_shuffle_v2i32_v3i32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3703,17 +3703,17 @@ define void @s_shuffle_v2i32_v3i32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3765,17 +3765,17 @@ define void @s_shuffle_v2i32_v3i32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3814,20 +3814,20 @@ define void @s_shuffle_v2i32_v3i32__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3868,21 +3868,21 @@ define void @s_shuffle_v2i32_v3i32__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3923,21 +3923,21 @@ define void @s_shuffle_v2i32_v3i32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> @@ -3991,18 +3991,18 @@ define void @s_shuffle_v2i32_v3i32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v3i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v3i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll index a2431d56ce2fd..37df1b6a72e03 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i32_v4i32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i32_v4i32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2i32_v4i32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2i32_v4i32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -159,17 +159,17 @@ define void @v_shuffle_v2i32_v4i32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -211,17 +211,17 @@ define void @v_shuffle_v2i32_v4i32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -252,16 +252,16 @@ define void @v_shuffle_v2i32_v4i32__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -294,17 +294,17 @@ define void @v_shuffle_v2i32_v4i32__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -344,21 +344,21 @@ define void @v_shuffle_v2i32_v4i32__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -397,21 +397,21 @@ define void @v_shuffle_v2i32_v4i32__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -450,21 +450,21 @@ define void @v_shuffle_v2i32_v4i32__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -503,21 +503,21 @@ define void @v_shuffle_v2i32_v4i32__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -552,18 +552,18 @@ define void @v_shuffle_v2i32_v4i32__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -596,17 +596,17 @@ define void @v_shuffle_v2i32_v4i32__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -639,17 +639,17 @@ define void @v_shuffle_v2i32_v4i32__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -682,17 +682,17 @@ define void @v_shuffle_v2i32_v4i32__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -725,17 +725,17 @@ define void @v_shuffle_v2i32_v4i32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -767,17 +767,17 @@ define void @v_shuffle_v2i32_v4i32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> zeroinitializer store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -809,17 +809,17 @@ define void @v_shuffle_v2i32_v4i32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -851,17 +851,17 @@ define void @v_shuffle_v2i32_v4i32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -895,18 +895,18 @@ define void @v_shuffle_v2i32_v4i32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -938,17 +938,17 @@ define void @v_shuffle_v2i32_v4i32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -986,21 +986,21 @@ define void @v_shuffle_v2i32_v4i32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2i32_v4i32__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1084,16 +1084,16 @@ define void @v_shuffle_v2i32_v4i32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1123,16 +1123,16 @@ define void @v_shuffle_v2i32_v4i32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1164,17 +1164,17 @@ define void @v_shuffle_v2i32_v4i32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1206,17 +1206,17 @@ define void @v_shuffle_v2i32_v4i32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1248,17 +1248,17 @@ define void @v_shuffle_v2i32_v4i32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1288,16 +1288,16 @@ define void @v_shuffle_v2i32_v4i32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1335,21 +1335,21 @@ define void @v_shuffle_v2i32_v4i32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1388,21 +1388,21 @@ define void @v_shuffle_v2i32_v4i32__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1434,17 +1434,17 @@ define void @v_shuffle_v2i32_v4i32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1476,17 +1476,17 @@ define void @v_shuffle_v2i32_v4i32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1518,18 +1518,18 @@ define void @v_shuffle_v2i32_v4i32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1561,17 +1561,17 @@ define void @v_shuffle_v2i32_v4i32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1603,17 +1603,17 @@ define void @v_shuffle_v2i32_v4i32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1644,17 +1644,17 @@ define void @v_shuffle_v2i32_v4i32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,21 +1692,21 @@ define void @v_shuffle_v2i32_v4i32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1745,21 +1745,21 @@ define void @v_shuffle_v2i32_v4i32__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -1790,16 +1790,16 @@ define void @v_shuffle_v2i32_v4i32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1831,17 +1831,17 @@ define void @v_shuffle_v2i32_v4i32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1873,17 +1873,17 @@ define void @v_shuffle_v2i32_v4i32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1913,16 +1913,16 @@ define void @v_shuffle_v2i32_v4i32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1954,17 +1954,17 @@ define void @v_shuffle_v2i32_v4i32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1994,16 +1994,16 @@ define void @v_shuffle_v2i32_v4i32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2041,21 +2041,21 @@ define void @v_shuffle_v2i32_v4i32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2094,21 +2094,21 @@ define void @v_shuffle_v2i32_v4i32__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2150,16 +2150,16 @@ define void @v_shuffle_v2i32_v4i32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2190,17 +2190,17 @@ define void @v_shuffle_v2i32_v4i32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2230,16 +2230,16 @@ define void @v_shuffle_v2i32_v4i32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2271,17 +2271,17 @@ define void @v_shuffle_v2i32_v4i32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2324,17 +2324,17 @@ define void @v_shuffle_v2i32_v4i32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2367,17 +2367,17 @@ define void @v_shuffle_v2i32_v4i32__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2408,16 +2408,16 @@ define void @v_shuffle_v2i32_v4i32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2456,21 +2456,21 @@ define void @v_shuffle_v2i32_v4i32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2509,21 +2509,21 @@ define void @v_shuffle_v2i32_v4i32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2562,21 +2562,21 @@ define void @v_shuffle_v2i32_v4i32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2615,21 +2615,21 @@ define void @v_shuffle_v2i32_v4i32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2660,16 +2660,16 @@ define void @v_shuffle_v2i32_v4i32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2702,17 +2702,17 @@ define void @v_shuffle_v2i32_v4i32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2745,17 +2745,17 @@ define void @v_shuffle_v2i32_v4i32__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2787,17 +2787,17 @@ define void @v_shuffle_v2i32_v4i32__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2836,21 +2836,21 @@ define void @v_shuffle_v2i32_v4i32__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2889,21 +2889,21 @@ define void @v_shuffle_v2i32_v4i32__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2942,21 +2942,21 @@ define void @v_shuffle_v2i32_v4i32__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -2995,21 +2995,21 @@ define void @v_shuffle_v2i32_v4i32__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3042,17 +3042,17 @@ define void @v_shuffle_v2i32_v4i32__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3085,18 +3085,18 @@ define void @v_shuffle_v2i32_v4i32__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3129,17 +3129,17 @@ define void @v_shuffle_v2i32_v4i32__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3170,16 +3170,16 @@ define void @v_shuffle_v2i32_v4i32__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v2i32_v4i32__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v2i32_v4i32__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3324,21 +3324,21 @@ define void @v_shuffle_v2i32_v4i32__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3377,21 +3377,21 @@ define void @v_shuffle_v2i32_v4i32__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3424,17 +3424,17 @@ define void @v_shuffle_v2i32_v4i32__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3467,17 +3467,17 @@ define void @v_shuffle_v2i32_v4i32__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3508,16 +3508,16 @@ define void @v_shuffle_v2i32_v4i32__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v4i32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v4i32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3562,17 +3562,17 @@ define void @s_shuffle_v2i32_v4i32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3604,17 +3604,17 @@ define void @s_shuffle_v2i32_v4i32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3646,17 +3646,17 @@ define void @s_shuffle_v2i32_v4i32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3688,17 +3688,17 @@ define void @s_shuffle_v2i32_v4i32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -3744,17 +3744,17 @@ define void @s_shuffle_v2i32_v4i32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3787,17 +3787,17 @@ define void @s_shuffle_v2i32_v4i32__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3830,17 +3830,17 @@ define void @s_shuffle_v2i32_v4i32__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3881,21 +3881,21 @@ define void @s_shuffle_v2i32_v4i32__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3934,20 +3934,20 @@ define void @s_shuffle_v2i32_v4i32__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -3988,21 +3988,21 @@ define void @s_shuffle_v2i32_v4i32__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4043,21 +4043,21 @@ define void @s_shuffle_v2i32_v4i32__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4092,18 +4092,18 @@ define void @s_shuffle_v2i32_v4i32__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4157,18 +4157,18 @@ define void @s_shuffle_v2i32_v4i32__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4203,18 +4203,18 @@ define void @s_shuffle_v2i32_v4i32__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4247,17 +4247,17 @@ define void @s_shuffle_v2i32_v4i32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4309,18 +4309,18 @@ define void @s_shuffle_v2i32_v4i32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4354,18 +4354,18 @@ define void @s_shuffle_v2i32_v4i32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4399,18 +4399,18 @@ define void @s_shuffle_v2i32_v4i32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4442,17 +4442,17 @@ define void @s_shuffle_v2i32_v4i32__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4492,21 +4492,21 @@ define void @s_shuffle_v2i32_v4i32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4547,21 +4547,21 @@ define void @s_shuffle_v2i32_v4i32__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4592,17 +4592,17 @@ define void @s_shuffle_v2i32_v4i32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4632,17 +4632,17 @@ define void @s_shuffle_v2i32_v4i32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4694,18 +4694,18 @@ define void @s_shuffle_v2i32_v4i32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4753,17 +4753,17 @@ define void @s_shuffle_v2i32_v4i32__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4801,20 +4801,20 @@ define void @s_shuffle_v2i32_v4i32__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4855,21 +4855,21 @@ define void @s_shuffle_v2i32_v4i32__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -4902,17 +4902,17 @@ define void @s_shuffle_v2i32_v4i32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -4964,18 +4964,18 @@ define void @s_shuffle_v2i32_v4i32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5009,18 +5009,18 @@ define void @s_shuffle_v2i32_v4i32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5054,18 +5054,18 @@ define void @s_shuffle_v2i32_v4i32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5097,17 +5097,17 @@ define void @s_shuffle_v2i32_v4i32__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5147,21 +5147,21 @@ define void @s_shuffle_v2i32_v4i32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5202,21 +5202,21 @@ define void @s_shuffle_v2i32_v4i32__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5249,17 +5249,17 @@ define void @s_shuffle_v2i32_v4i32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5311,18 +5311,18 @@ define void @s_shuffle_v2i32_v4i32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5354,17 +5354,17 @@ define void @s_shuffle_v2i32_v4i32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5398,18 +5398,18 @@ define void @s_shuffle_v2i32_v4i32__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5441,17 +5441,17 @@ define void @s_shuffle_v2i32_v4i32__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5491,21 +5491,21 @@ define void @s_shuffle_v2i32_v4i32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5546,21 +5546,21 @@ define void @s_shuffle_v2i32_v4i32__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5605,17 +5605,17 @@ define void @s_shuffle_v2i32_v4i32__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5647,17 +5647,17 @@ define void @s_shuffle_v2i32_v4i32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5689,17 +5689,17 @@ define void @s_shuffle_v2i32_v4i32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5731,17 +5731,17 @@ define void @s_shuffle_v2i32_v4i32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -5789,18 +5789,18 @@ define void @s_shuffle_v2i32_v4i32__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5835,18 +5835,18 @@ define void @s_shuffle_v2i32_v4i32__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5877,17 +5877,17 @@ define void @s_shuffle_v2i32_v4i32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5926,20 +5926,20 @@ define void @s_shuffle_v2i32_v4i32__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -5978,20 +5978,20 @@ define void @s_shuffle_v2i32_v4i32__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6032,21 +6032,21 @@ define void @s_shuffle_v2i32_v4i32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6085,20 +6085,20 @@ define void @s_shuffle_v2i32_v4i32__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6129,17 +6129,17 @@ define void @s_shuffle_v2i32_v4i32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6193,18 +6193,18 @@ define void @s_shuffle_v2i32_v4i32__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6237,17 +6237,17 @@ define void @s_shuffle_v2i32_v4i32__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6286,20 +6286,20 @@ define void @s_shuffle_v2i32_v4i32__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6340,21 +6340,21 @@ define void @s_shuffle_v2i32_v4i32__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6395,21 +6395,21 @@ define void @s_shuffle_v2i32_v4i32__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6450,21 +6450,21 @@ define void @s_shuffle_v2i32_v4i32__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6518,18 +6518,18 @@ define void @s_shuffle_v2i32_v4i32__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6564,18 +6564,18 @@ define void @s_shuffle_v2i32_v4i32__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6608,17 +6608,17 @@ define void @s_shuffle_v2i32_v4i32__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6657,20 +6657,20 @@ define void @s_shuffle_v2i32_v4i32__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6711,21 +6711,21 @@ define void @s_shuffle_v2i32_v4i32__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6766,21 +6766,21 @@ define void @s_shuffle_v2i32_v4i32__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6821,21 +6821,21 @@ define void @s_shuffle_v2i32_v4i32__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6889,18 +6889,18 @@ define void @s_shuffle_v2i32_v4i32__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> @@ -6933,17 +6933,17 @@ define void @s_shuffle_v2i32_v4i32__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v4i32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v4i32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll index 83a51bc87eccf..94ee1774c2766 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i32_v8i32__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i32_v8i32__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2i32_v8i32__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2i32_v8i32__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -158,17 +158,17 @@ define void @v_shuffle_v2i32_v8i32__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -198,16 +198,16 @@ define void @v_shuffle_v2i32_v8i32__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -238,17 +238,17 @@ define void @v_shuffle_v2i32_v8i32__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -278,16 +278,16 @@ define void @v_shuffle_v2i32_v8i32__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -319,17 +319,17 @@ define void @v_shuffle_v2i32_v8i32__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -371,17 +371,17 @@ define void @v_shuffle_v2i32_v8i32__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -412,16 +412,16 @@ define void @v_shuffle_v2i32_v8i32__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -453,17 +453,17 @@ define void @v_shuffle_v2i32_v8i32__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -494,16 +494,16 @@ define void @v_shuffle_v2i32_v8i32__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -535,17 +535,17 @@ define void @v_shuffle_v2i32_v8i32__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -576,16 +576,16 @@ define void @v_shuffle_v2i32_v8i32__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -618,17 +618,17 @@ define void @v_shuffle_v2i32_v8i32__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -668,21 +668,21 @@ define void @v_shuffle_v2i32_v8i32__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -721,21 +721,21 @@ define void @v_shuffle_v2i32_v8i32__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -774,21 +774,21 @@ define void @v_shuffle_v2i32_v8i32__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -827,21 +827,21 @@ define void @v_shuffle_v2i32_v8i32__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -880,21 +880,21 @@ define void @v_shuffle_v2i32_v8i32__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -933,21 +933,21 @@ define void @v_shuffle_v2i32_v8i32__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -986,21 +986,21 @@ define void @v_shuffle_v2i32_v8i32__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2i32_v8i32__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1088,18 +1088,18 @@ define void @v_shuffle_v2i32_v8i32__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1132,17 +1132,17 @@ define void @v_shuffle_v2i32_v8i32__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1176,18 +1176,18 @@ define void @v_shuffle_v2i32_v8i32__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1220,17 +1220,17 @@ define void @v_shuffle_v2i32_v8i32__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1264,18 +1264,18 @@ define void @v_shuffle_v2i32_v8i32__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1308,17 +1308,17 @@ define void @v_shuffle_v2i32_v8i32__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1351,17 +1351,17 @@ define void @v_shuffle_v2i32_v8i32__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1394,17 +1394,17 @@ define void @v_shuffle_v2i32_v8i32__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1437,17 +1437,17 @@ define void @v_shuffle_v2i32_v8i32__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1479,17 +1479,17 @@ define void @v_shuffle_v2i32_v8i32__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> zeroinitializer store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1521,17 +1521,17 @@ define void @v_shuffle_v2i32_v8i32__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1563,17 +1563,17 @@ define void @v_shuffle_v2i32_v8i32__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1606,18 +1606,18 @@ define void @v_shuffle_v2i32_v8i32__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1649,17 +1649,17 @@ define void @v_shuffle_v2i32_v8i32__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,18 +1692,18 @@ define void @v_shuffle_v2i32_v8i32__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1735,17 +1735,17 @@ define void @v_shuffle_v2i32_v8i32__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1779,18 +1779,18 @@ define void @v_shuffle_v2i32_v8i32__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1822,17 +1822,17 @@ define void @v_shuffle_v2i32_v8i32__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1870,21 +1870,21 @@ define void @v_shuffle_v2i32_v8i32__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1923,21 +1923,21 @@ define void @v_shuffle_v2i32_v8i32__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -1976,21 +1976,21 @@ define void @v_shuffle_v2i32_v8i32__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2029,21 +2029,21 @@ define void @v_shuffle_v2i32_v8i32__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2082,21 +2082,21 @@ define void @v_shuffle_v2i32_v8i32__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2135,21 +2135,21 @@ define void @v_shuffle_v2i32_v8i32__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2180,16 +2180,16 @@ define void @v_shuffle_v2i32_v8i32__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2219,16 +2219,16 @@ define void @v_shuffle_v2i32_v8i32__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2260,17 +2260,17 @@ define void @v_shuffle_v2i32_v8i32__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2302,17 +2302,17 @@ define void @v_shuffle_v2i32_v8i32__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2344,17 +2344,17 @@ define void @v_shuffle_v2i32_v8i32__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2386,17 +2386,17 @@ define void @v_shuffle_v2i32_v8i32__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2428,17 +2428,17 @@ define void @v_shuffle_v2i32_v8i32__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2470,17 +2470,17 @@ define void @v_shuffle_v2i32_v8i32__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2512,17 +2512,17 @@ define void @v_shuffle_v2i32_v8i32__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2552,16 +2552,16 @@ define void @v_shuffle_v2i32_v8i32__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2599,21 +2599,21 @@ define void @v_shuffle_v2i32_v8i32__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2652,21 +2652,21 @@ define void @v_shuffle_v2i32_v8i32__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2705,21 +2705,21 @@ define void @v_shuffle_v2i32_v8i32__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2758,21 +2758,21 @@ define void @v_shuffle_v2i32_v8i32__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2811,21 +2811,21 @@ define void @v_shuffle_v2i32_v8i32__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2864,21 +2864,21 @@ define void @v_shuffle_v2i32_v8i32__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -2910,17 +2910,17 @@ define void @v_shuffle_v2i32_v8i32__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2952,17 +2952,17 @@ define void @v_shuffle_v2i32_v8i32__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2994,18 +2994,18 @@ define void @v_shuffle_v2i32_v8i32__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3037,17 +3037,17 @@ define void @v_shuffle_v2i32_v8i32__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3079,17 +3079,17 @@ define void @v_shuffle_v2i32_v8i32__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3121,17 +3121,17 @@ define void @v_shuffle_v2i32_v8i32__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3164,18 +3164,18 @@ define void @v_shuffle_v2i32_v8i32__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3207,17 +3207,17 @@ define void @v_shuffle_v2i32_v8i32__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3250,18 +3250,18 @@ define void @v_shuffle_v2i32_v8i32__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3292,17 +3292,17 @@ define void @v_shuffle_v2i32_v8i32__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3340,21 +3340,21 @@ define void @v_shuffle_v2i32_v8i32__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3393,21 +3393,21 @@ define void @v_shuffle_v2i32_v8i32__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3446,21 +3446,21 @@ define void @v_shuffle_v2i32_v8i32__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3499,21 +3499,21 @@ define void @v_shuffle_v2i32_v8i32__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3552,21 +3552,21 @@ define void @v_shuffle_v2i32_v8i32__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3605,21 +3605,21 @@ define void @v_shuffle_v2i32_v8i32__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -3650,16 +3650,16 @@ define void @v_shuffle_v2i32_v8i32__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3691,17 +3691,17 @@ define void @v_shuffle_v2i32_v8i32__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3733,17 +3733,17 @@ define void @v_shuffle_v2i32_v8i32__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3773,16 +3773,16 @@ define void @v_shuffle_v2i32_v8i32__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3814,17 +3814,17 @@ define void @v_shuffle_v2i32_v8i32__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3856,17 +3856,17 @@ define void @v_shuffle_v2i32_v8i32__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3898,17 +3898,17 @@ define void @v_shuffle_v2i32_v8i32__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3940,17 +3940,17 @@ define void @v_shuffle_v2i32_v8i32__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3982,17 +3982,17 @@ define void @v_shuffle_v2i32_v8i32__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4022,16 +4022,16 @@ define void @v_shuffle_v2i32_v8i32__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4069,21 +4069,21 @@ define void @v_shuffle_v2i32_v8i32__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4122,21 +4122,21 @@ define void @v_shuffle_v2i32_v8i32__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4175,21 +4175,21 @@ define void @v_shuffle_v2i32_v8i32__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4228,21 +4228,21 @@ define void @v_shuffle_v2i32_v8i32__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4281,21 +4281,21 @@ define void @v_shuffle_v2i32_v8i32__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4334,21 +4334,21 @@ define void @v_shuffle_v2i32_v8i32__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4380,17 +4380,17 @@ define void @v_shuffle_v2i32_v8i32__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4422,17 +4422,17 @@ define void @v_shuffle_v2i32_v8i32__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4465,18 +4465,18 @@ define void @v_shuffle_v2i32_v8i32__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4508,17 +4508,17 @@ define void @v_shuffle_v2i32_v8i32__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4550,18 +4550,18 @@ define void @v_shuffle_v2i32_v8i32__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4593,17 +4593,17 @@ define void @v_shuffle_v2i32_v8i32__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4635,17 +4635,17 @@ define void @v_shuffle_v2i32_v8i32__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4677,17 +4677,17 @@ define void @v_shuffle_v2i32_v8i32__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4720,18 +4720,18 @@ define void @v_shuffle_v2i32_v8i32__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4762,17 +4762,17 @@ define void @v_shuffle_v2i32_v8i32__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4810,21 +4810,21 @@ define void @v_shuffle_v2i32_v8i32__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4863,21 +4863,21 @@ define void @v_shuffle_v2i32_v8i32__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4916,21 +4916,21 @@ define void @v_shuffle_v2i32_v8i32__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -4969,21 +4969,21 @@ define void @v_shuffle_v2i32_v8i32__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5022,21 +5022,21 @@ define void @v_shuffle_v2i32_v8i32__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5075,21 +5075,21 @@ define void @v_shuffle_v2i32_v8i32__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5120,16 +5120,16 @@ define void @v_shuffle_v2i32_v8i32__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5161,17 +5161,17 @@ define void @v_shuffle_v2i32_v8i32__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5203,17 +5203,17 @@ define void @v_shuffle_v2i32_v8i32__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5245,17 +5245,17 @@ define void @v_shuffle_v2i32_v8i32__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5287,17 +5287,17 @@ define void @v_shuffle_v2i32_v8i32__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5327,16 +5327,16 @@ define void @v_shuffle_v2i32_v8i32__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5368,17 +5368,17 @@ define void @v_shuffle_v2i32_v8i32__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5410,17 +5410,17 @@ define void @v_shuffle_v2i32_v8i32__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5452,17 +5452,17 @@ define void @v_shuffle_v2i32_v8i32__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5492,16 +5492,16 @@ define void @v_shuffle_v2i32_v8i32__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5539,21 +5539,21 @@ define void @v_shuffle_v2i32_v8i32__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5592,21 +5592,21 @@ define void @v_shuffle_v2i32_v8i32__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5645,21 +5645,21 @@ define void @v_shuffle_v2i32_v8i32__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5698,21 +5698,21 @@ define void @v_shuffle_v2i32_v8i32__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5751,21 +5751,21 @@ define void @v_shuffle_v2i32_v8i32__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5804,21 +5804,21 @@ define void @v_shuffle_v2i32_v8i32__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -5850,17 +5850,17 @@ define void @v_shuffle_v2i32_v8i32__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5892,17 +5892,17 @@ define void @v_shuffle_v2i32_v8i32__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5935,18 +5935,18 @@ define void @v_shuffle_v2i32_v8i32__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5978,17 +5978,17 @@ define void @v_shuffle_v2i32_v8i32__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6021,18 +6021,18 @@ define void @v_shuffle_v2i32_v8i32__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6064,17 +6064,17 @@ define void @v_shuffle_v2i32_v8i32__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6106,18 +6106,18 @@ define void @v_shuffle_v2i32_v8i32__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6149,17 +6149,17 @@ define void @v_shuffle_v2i32_v8i32__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6191,17 +6191,17 @@ define void @v_shuffle_v2i32_v8i32__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6232,17 +6232,17 @@ define void @v_shuffle_v2i32_v8i32__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6280,21 +6280,21 @@ define void @v_shuffle_v2i32_v8i32__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6333,21 +6333,21 @@ define void @v_shuffle_v2i32_v8i32__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6386,21 +6386,21 @@ define void @v_shuffle_v2i32_v8i32__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6439,21 +6439,21 @@ define void @v_shuffle_v2i32_v8i32__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6492,21 +6492,21 @@ define void @v_shuffle_v2i32_v8i32__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6545,21 +6545,21 @@ define void @v_shuffle_v2i32_v8i32__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -6590,16 +6590,16 @@ define void @v_shuffle_v2i32_v8i32__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6631,17 +6631,17 @@ define void @v_shuffle_v2i32_v8i32__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6673,17 +6673,17 @@ define void @v_shuffle_v2i32_v8i32__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6715,17 +6715,17 @@ define void @v_shuffle_v2i32_v8i32__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6757,17 +6757,17 @@ define void @v_shuffle_v2i32_v8i32__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6799,17 +6799,17 @@ define void @v_shuffle_v2i32_v8i32__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6841,17 +6841,17 @@ define void @v_shuffle_v2i32_v8i32__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6881,16 +6881,16 @@ define void @v_shuffle_v2i32_v8i32__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6922,17 +6922,17 @@ define void @v_shuffle_v2i32_v8i32__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6962,16 +6962,16 @@ define void @v_shuffle_v2i32_v8i32__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7009,21 +7009,21 @@ define void @v_shuffle_v2i32_v8i32__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7062,21 +7062,21 @@ define void @v_shuffle_v2i32_v8i32__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7115,21 +7115,21 @@ define void @v_shuffle_v2i32_v8i32__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7168,21 +7168,21 @@ define void @v_shuffle_v2i32_v8i32__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7221,21 +7221,21 @@ define void @v_shuffle_v2i32_v8i32__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7274,21 +7274,21 @@ define void @v_shuffle_v2i32_v8i32__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7330,16 +7330,16 @@ define void @v_shuffle_v2i32_v8i32__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7370,17 +7370,17 @@ define void @v_shuffle_v2i32_v8i32__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7410,16 +7410,16 @@ define void @v_shuffle_v2i32_v8i32__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7450,17 +7450,17 @@ define void @v_shuffle_v2i32_v8i32__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7490,16 +7490,16 @@ define void @v_shuffle_v2i32_v8i32__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7530,17 +7530,17 @@ define void @v_shuffle_v2i32_v8i32__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7570,16 +7570,16 @@ define void @v_shuffle_v2i32_v8i32__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7611,17 +7611,17 @@ define void @v_shuffle_v2i32_v8i32__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> store <2 x i32> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7664,17 +7664,17 @@ define void @v_shuffle_v2i32_v8i32__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7707,17 +7707,17 @@ define void @v_shuffle_v2i32_v8i32__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7751,18 +7751,18 @@ define void @v_shuffle_v2i32_v8i32__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7795,17 +7795,17 @@ define void @v_shuffle_v2i32_v8i32__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7839,18 +7839,18 @@ define void @v_shuffle_v2i32_v8i32__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7883,17 +7883,17 @@ define void @v_shuffle_v2i32_v8i32__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7924,16 +7924,16 @@ define void @v_shuffle_v2i32_v8i32__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -7972,21 +7972,21 @@ define void @v_shuffle_v2i32_v8i32__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8025,21 +8025,21 @@ define void @v_shuffle_v2i32_v8i32__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8078,21 +8078,21 @@ define void @v_shuffle_v2i32_v8i32__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8131,21 +8131,21 @@ define void @v_shuffle_v2i32_v8i32__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8184,21 +8184,21 @@ define void @v_shuffle_v2i32_v8i32__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8237,21 +8237,21 @@ define void @v_shuffle_v2i32_v8i32__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8290,21 +8290,21 @@ define void @v_shuffle_v2i32_v8i32__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8343,21 +8343,21 @@ define void @v_shuffle_v2i32_v8i32__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8388,16 +8388,16 @@ define void @v_shuffle_v2i32_v8i32__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8430,17 +8430,17 @@ define void @v_shuffle_v2i32_v8i32__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8473,17 +8473,17 @@ define void @v_shuffle_v2i32_v8i32__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8516,17 +8516,17 @@ define void @v_shuffle_v2i32_v8i32__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8559,17 +8559,17 @@ define void @v_shuffle_v2i32_v8i32__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8602,17 +8602,17 @@ define void @v_shuffle_v2i32_v8i32__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8645,17 +8645,17 @@ define void @v_shuffle_v2i32_v8i32__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8687,17 +8687,17 @@ define void @v_shuffle_v2i32_v8i32__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8736,21 +8736,21 @@ define void @v_shuffle_v2i32_v8i32__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8789,21 +8789,21 @@ define void @v_shuffle_v2i32_v8i32__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8842,21 +8842,21 @@ define void @v_shuffle_v2i32_v8i32__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8895,21 +8895,21 @@ define void @v_shuffle_v2i32_v8i32__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -8948,21 +8948,21 @@ define void @v_shuffle_v2i32_v8i32__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v8 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v8 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9001,21 +9001,21 @@ define void @v_shuffle_v2i32_v8i32__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9054,21 +9054,21 @@ define void @v_shuffle_v2i32_v8i32__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v10 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v10 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9107,21 +9107,21 @@ define void @v_shuffle_v2i32_v8i32__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9154,17 +9154,17 @@ define void @v_shuffle_v2i32_v8i32__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9197,18 +9197,18 @@ define void @v_shuffle_v2i32_v8i32__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9241,17 +9241,17 @@ define void @v_shuffle_v2i32_v8i32__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9284,17 +9284,17 @@ define void @v_shuffle_v2i32_v8i32__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9327,17 +9327,17 @@ define void @v_shuffle_v2i32_v8i32__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9371,18 +9371,18 @@ define void @v_shuffle_v2i32_v8i32__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9415,17 +9415,17 @@ define void @v_shuffle_v2i32_v8i32__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9456,16 +9456,16 @@ define void @v_shuffle_v2i32_v8i32__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9504,21 +9504,21 @@ define void @v_shuffle_v2i32_v8i32__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9557,21 +9557,21 @@ define void @v_shuffle_v2i32_v8i32__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9610,21 +9610,21 @@ define void @v_shuffle_v2i32_v8i32__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9663,21 +9663,21 @@ define void @v_shuffle_v2i32_v8i32__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9716,21 +9716,21 @@ define void @v_shuffle_v2i32_v8i32__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9769,21 +9769,21 @@ define void @v_shuffle_v2i32_v8i32__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9822,21 +9822,21 @@ define void @v_shuffle_v2i32_v8i32__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9875,21 +9875,21 @@ define void @v_shuffle_v2i32_v8i32__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9922,17 +9922,17 @@ define void @v_shuffle_v2i32_v8i32__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -9965,17 +9965,17 @@ define void @v_shuffle_v2i32_v8i32__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10006,16 +10006,16 @@ define void @v_shuffle_v2i32_v8i32__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10048,17 +10048,17 @@ define void @v_shuffle_v2i32_v8i32__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10091,17 +10091,17 @@ define void @v_shuffle_v2i32_v8i32__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10134,17 +10134,17 @@ define void @v_shuffle_v2i32_v8i32__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10177,17 +10177,17 @@ define void @v_shuffle_v2i32_v8i32__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10219,17 +10219,17 @@ define void @v_shuffle_v2i32_v8i32__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10268,21 +10268,21 @@ define void @v_shuffle_v2i32_v8i32__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10321,21 +10321,21 @@ define void @v_shuffle_v2i32_v8i32__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10374,21 +10374,21 @@ define void @v_shuffle_v2i32_v8i32__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v8 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v8 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10427,21 +10427,21 @@ define void @v_shuffle_v2i32_v8i32__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10480,21 +10480,21 @@ define void @v_shuffle_v2i32_v8i32__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v10 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v10 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10533,21 +10533,21 @@ define void @v_shuffle_v2i32_v8i32__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10586,21 +10586,21 @@ define void @v_shuffle_v2i32_v8i32__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v12 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v12 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10639,21 +10639,21 @@ define void @v_shuffle_v2i32_v8i32__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10686,17 +10686,17 @@ define void @v_shuffle_v2i32_v8i32__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10730,18 +10730,18 @@ define void @v_shuffle_v2i32_v8i32__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10774,17 +10774,17 @@ define void @v_shuffle_v2i32_v8i32__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10817,18 +10817,18 @@ define void @v_shuffle_v2i32_v8i32__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10861,17 +10861,17 @@ define void @v_shuffle_v2i32_v8i32__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10904,17 +10904,17 @@ define void @v_shuffle_v2i32_v8i32__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10947,17 +10947,17 @@ define void @v_shuffle_v2i32_v8i32__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -10988,16 +10988,16 @@ define void @v_shuffle_v2i32_v8i32__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11036,21 +11036,21 @@ define void @v_shuffle_v2i32_v8i32__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11089,21 +11089,21 @@ define void @v_shuffle_v2i32_v8i32__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11142,21 +11142,21 @@ define void @v_shuffle_v2i32_v8i32__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11195,21 +11195,21 @@ define void @v_shuffle_v2i32_v8i32__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11248,21 +11248,21 @@ define void @v_shuffle_v2i32_v8i32__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11301,21 +11301,21 @@ define void @v_shuffle_v2i32_v8i32__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11354,21 +11354,21 @@ define void @v_shuffle_v2i32_v8i32__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11407,21 +11407,21 @@ define void @v_shuffle_v2i32_v8i32__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11454,17 +11454,17 @@ define void @v_shuffle_v2i32_v8i32__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11497,17 +11497,17 @@ define void @v_shuffle_v2i32_v8i32__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11540,17 +11540,17 @@ define void @v_shuffle_v2i32_v8i32__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11583,17 +11583,17 @@ define void @v_shuffle_v2i32_v8i32__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11624,16 +11624,16 @@ define void @v_shuffle_v2i32_v8i32__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11666,17 +11666,17 @@ define void @v_shuffle_v2i32_v8i32__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11709,17 +11709,17 @@ define void @v_shuffle_v2i32_v8i32__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11751,17 +11751,17 @@ define void @v_shuffle_v2i32_v8i32__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11800,21 +11800,21 @@ define void @v_shuffle_v2i32_v8i32__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11853,21 +11853,21 @@ define void @v_shuffle_v2i32_v8i32__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11906,21 +11906,21 @@ define void @v_shuffle_v2i32_v8i32__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v10 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v10 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -11959,21 +11959,21 @@ define void @v_shuffle_v2i32_v8i32__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12012,21 +12012,21 @@ define void @v_shuffle_v2i32_v8i32__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v12 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v12 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12065,21 +12065,21 @@ define void @v_shuffle_v2i32_v8i32__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12118,21 +12118,21 @@ define void @v_shuffle_v2i32_v8i32__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v14 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v14 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12171,21 +12171,21 @@ define void @v_shuffle_v2i32_v8i32__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12218,17 +12218,17 @@ define void @v_shuffle_v2i32_v8i32__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12262,18 +12262,18 @@ define void @v_shuffle_v2i32_v8i32__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12306,17 +12306,17 @@ define void @v_shuffle_v2i32_v8i32__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12350,18 +12350,18 @@ define void @v_shuffle_v2i32_v8i32__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12394,17 +12394,17 @@ define void @v_shuffle_v2i32_v8i32__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12437,18 +12437,18 @@ define void @v_shuffle_v2i32_v8i32__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12481,17 +12481,17 @@ define void @v_shuffle_v2i32_v8i32__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12522,16 +12522,16 @@ define void @v_shuffle_v2i32_v8i32__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12570,21 +12570,21 @@ define void @v_shuffle_v2i32_v8i32__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12623,21 +12623,21 @@ define void @v_shuffle_v2i32_v8i32__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12676,21 +12676,21 @@ define void @v_shuffle_v2i32_v8i32__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12729,21 +12729,21 @@ define void @v_shuffle_v2i32_v8i32__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12782,21 +12782,21 @@ define void @v_shuffle_v2i32_v8i32__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12835,21 +12835,21 @@ define void @v_shuffle_v2i32_v8i32__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12888,21 +12888,21 @@ define void @v_shuffle_v2i32_v8i32__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12941,21 +12941,21 @@ define void @v_shuffle_v2i32_v8i32__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -12988,17 +12988,17 @@ define void @v_shuffle_v2i32_v8i32__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13031,17 +13031,17 @@ define void @v_shuffle_v2i32_v8i32__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13074,17 +13074,17 @@ define void @v_shuffle_v2i32_v8i32__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13117,17 +13117,17 @@ define void @v_shuffle_v2i32_v8i32__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13160,17 +13160,17 @@ define void @v_shuffle_v2i32_v8i32__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13203,17 +13203,17 @@ define void @v_shuffle_v2i32_v8i32__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13244,16 +13244,16 @@ define void @v_shuffle_v2i32_v8i32__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i32_v8i32__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i32_v8i32__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=v"() %vec1 = call <8 x i32> asm "; def $0", "=v"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13298,17 +13298,17 @@ define void @s_shuffle_v2i32_v8i32__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13340,17 +13340,17 @@ define void @s_shuffle_v2i32_v8i32__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13382,17 +13382,17 @@ define void @s_shuffle_v2i32_v8i32__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13424,17 +13424,17 @@ define void @s_shuffle_v2i32_v8i32__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13464,17 +13464,17 @@ define void @s_shuffle_v2i32_v8i32__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13506,17 +13506,17 @@ define void @s_shuffle_v2i32_v8i32__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13548,17 +13548,17 @@ define void @s_shuffle_v2i32_v8i32__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13590,17 +13590,17 @@ define void @s_shuffle_v2i32_v8i32__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -13646,17 +13646,17 @@ define void @s_shuffle_v2i32_v8i32__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13689,17 +13689,17 @@ define void @s_shuffle_v2i32_v8i32__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13732,17 +13732,17 @@ define void @s_shuffle_v2i32_v8i32__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13773,17 +13773,17 @@ define void @s_shuffle_v2i32_v8i32__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13816,17 +13816,17 @@ define void @s_shuffle_v2i32_v8i32__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13859,17 +13859,17 @@ define void @s_shuffle_v2i32_v8i32__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13902,17 +13902,17 @@ define void @s_shuffle_v2i32_v8i32__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -13953,22 +13953,22 @@ define void @s_shuffle_v2i32_v8i32__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14007,20 +14007,20 @@ define void @s_shuffle_v2i32_v8i32__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14061,22 +14061,22 @@ define void @s_shuffle_v2i32_v8i32__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14117,22 +14117,22 @@ define void @s_shuffle_v2i32_v8i32__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14173,21 +14173,21 @@ define void @s_shuffle_v2i32_v8i32__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14226,21 +14226,21 @@ define void @s_shuffle_v2i32_v8i32__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14281,21 +14281,21 @@ define void @s_shuffle_v2i32_v8i32__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14336,21 +14336,21 @@ define void @s_shuffle_v2i32_v8i32__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14385,18 +14385,18 @@ define void @s_shuffle_v2i32_v8i32__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14450,18 +14450,18 @@ define void @s_shuffle_v2i32_v8i32__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14496,18 +14496,18 @@ define void @s_shuffle_v2i32_v8i32__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14542,18 +14542,18 @@ define void @s_shuffle_v2i32_v8i32__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14607,18 +14607,18 @@ define void @s_shuffle_v2i32_v8i32__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14653,18 +14653,18 @@ define void @s_shuffle_v2i32_v8i32__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -14697,17 +14697,17 @@ define void @s_shuffle_v2i32_v8i32__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -14759,18 +14759,18 @@ define void @s_shuffle_v2i32_v8i32__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -14804,18 +14804,18 @@ define void @s_shuffle_v2i32_v8i32__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -14849,18 +14849,18 @@ define void @s_shuffle_v2i32_v8i32__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -14912,18 +14912,18 @@ define void @s_shuffle_v2i32_v8i32__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -14957,18 +14957,18 @@ define void @s_shuffle_v2i32_v8i32__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15002,18 +15002,18 @@ define void @s_shuffle_v2i32_v8i32__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15045,17 +15045,17 @@ define void @s_shuffle_v2i32_v8i32__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15095,22 +15095,22 @@ define void @s_shuffle_v2i32_v8i32__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15151,22 +15151,22 @@ define void @s_shuffle_v2i32_v8i32__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15207,22 +15207,22 @@ define void @s_shuffle_v2i32_v8i32__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15261,21 +15261,21 @@ define void @s_shuffle_v2i32_v8i32__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15316,22 +15316,22 @@ define void @s_shuffle_v2i32_v8i32__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15372,22 +15372,22 @@ define void @s_shuffle_v2i32_v8i32__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15418,17 +15418,17 @@ define void @s_shuffle_v2i32_v8i32__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15458,17 +15458,17 @@ define void @s_shuffle_v2i32_v8i32__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15520,18 +15520,18 @@ define void @s_shuffle_v2i32_v8i32__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15619,18 +15619,18 @@ define void @s_shuffle_v2i32_v8i32__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15678,17 +15678,17 @@ define void @s_shuffle_v2i32_v8i32__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -15726,20 +15726,20 @@ define void @s_shuffle_v2i32_v8i32__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15780,22 +15780,22 @@ define void @s_shuffle_v2i32_v8i32__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15834,20 +15834,20 @@ define void @s_shuffle_v2i32_v8i32__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15886,21 +15886,21 @@ define void @s_shuffle_v2i32_v8i32__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15939,20 +15939,20 @@ define void @s_shuffle_v2i32_v8i32__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -15993,22 +15993,22 @@ define void @s_shuffle_v2i32_v8i32__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16041,17 +16041,17 @@ define void @s_shuffle_v2i32_v8i32__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16103,18 +16103,18 @@ define void @s_shuffle_v2i32_v8i32__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16148,18 +16148,18 @@ define void @s_shuffle_v2i32_v8i32__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16193,18 +16193,18 @@ define void @s_shuffle_v2i32_v8i32__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16256,18 +16256,18 @@ define void @s_shuffle_v2i32_v8i32__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16301,18 +16301,18 @@ define void @s_shuffle_v2i32_v8i32__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16346,18 +16346,18 @@ define void @s_shuffle_v2i32_v8i32__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16389,17 +16389,17 @@ define void @s_shuffle_v2i32_v8i32__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16439,22 +16439,22 @@ define void @s_shuffle_v2i32_v8i32__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16495,22 +16495,22 @@ define void @s_shuffle_v2i32_v8i32__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16551,22 +16551,22 @@ define void @s_shuffle_v2i32_v8i32__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16605,21 +16605,21 @@ define void @s_shuffle_v2i32_v8i32__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16660,22 +16660,22 @@ define void @s_shuffle_v2i32_v8i32__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16716,22 +16716,22 @@ define void @s_shuffle_v2i32_v8i32__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -16764,17 +16764,17 @@ define void @s_shuffle_v2i32_v8i32__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16826,18 +16826,18 @@ define void @s_shuffle_v2i32_v8i32__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16869,17 +16869,17 @@ define void @s_shuffle_v2i32_v8i32__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16913,18 +16913,18 @@ define void @s_shuffle_v2i32_v8i32__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -16976,18 +16976,18 @@ define void @s_shuffle_v2i32_v8i32__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17021,18 +17021,18 @@ define void @s_shuffle_v2i32_v8i32__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17066,18 +17066,18 @@ define void @s_shuffle_v2i32_v8i32__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17109,17 +17109,17 @@ define void @s_shuffle_v2i32_v8i32__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17159,22 +17159,22 @@ define void @s_shuffle_v2i32_v8i32__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17215,22 +17215,22 @@ define void @s_shuffle_v2i32_v8i32__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17271,22 +17271,22 @@ define void @s_shuffle_v2i32_v8i32__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17325,21 +17325,21 @@ define void @s_shuffle_v2i32_v8i32__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17380,22 +17380,22 @@ define void @s_shuffle_v2i32_v8i32__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17436,22 +17436,22 @@ define void @s_shuffle_v2i32_v8i32__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17484,17 +17484,17 @@ define void @s_shuffle_v2i32_v8i32__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17546,18 +17546,18 @@ define void @s_shuffle_v2i32_v8i32__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17591,18 +17591,18 @@ define void @s_shuffle_v2i32_v8i32__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17636,18 +17636,18 @@ define void @s_shuffle_v2i32_v8i32__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17699,18 +17699,18 @@ define void @s_shuffle_v2i32_v8i32__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17744,18 +17744,18 @@ define void @s_shuffle_v2i32_v8i32__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17789,18 +17789,18 @@ define void @s_shuffle_v2i32_v8i32__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17832,17 +17832,17 @@ define void @s_shuffle_v2i32_v8i32__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -17882,21 +17882,21 @@ define void @s_shuffle_v2i32_v8i32__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17937,21 +17937,21 @@ define void @s_shuffle_v2i32_v8i32__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -17992,21 +17992,21 @@ define void @s_shuffle_v2i32_v8i32__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18045,21 +18045,21 @@ define void @s_shuffle_v2i32_v8i32__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18100,21 +18100,21 @@ define void @s_shuffle_v2i32_v8i32__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18155,21 +18155,21 @@ define void @s_shuffle_v2i32_v8i32__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18200,17 +18200,17 @@ define void @s_shuffle_v2i32_v8i32__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18280,18 +18280,18 @@ define void @s_shuffle_v2i32_v8i32__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18339,17 +18339,17 @@ define void @s_shuffle_v2i32_v8i32__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18401,18 +18401,18 @@ define void @s_shuffle_v2i32_v8i32__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18460,17 +18460,17 @@ define void @s_shuffle_v2i32_v8i32__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18508,21 +18508,21 @@ define void @s_shuffle_v2i32_v8i32__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18563,21 +18563,21 @@ define void @s_shuffle_v2i32_v8i32__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18616,21 +18616,21 @@ define void @s_shuffle_v2i32_v8i32__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18669,21 +18669,21 @@ define void @s_shuffle_v2i32_v8i32__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18722,21 +18722,21 @@ define void @s_shuffle_v2i32_v8i32__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18777,21 +18777,21 @@ define void @s_shuffle_v2i32_v8i32__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -18824,17 +18824,17 @@ define void @s_shuffle_v2i32_v8i32__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18886,18 +18886,18 @@ define void @s_shuffle_v2i32_v8i32__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18931,18 +18931,18 @@ define void @s_shuffle_v2i32_v8i32__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -18976,18 +18976,18 @@ define void @s_shuffle_v2i32_v8i32__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19039,18 +19039,18 @@ define void @s_shuffle_v2i32_v8i32__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19084,18 +19084,18 @@ define void @s_shuffle_v2i32_v8i32__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19129,18 +19129,18 @@ define void @s_shuffle_v2i32_v8i32__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19172,17 +19172,17 @@ define void @s_shuffle_v2i32_v8i32__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19222,21 +19222,21 @@ define void @s_shuffle_v2i32_v8i32__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19277,21 +19277,21 @@ define void @s_shuffle_v2i32_v8i32__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19332,21 +19332,21 @@ define void @s_shuffle_v2i32_v8i32__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19385,21 +19385,21 @@ define void @s_shuffle_v2i32_v8i32__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19440,21 +19440,21 @@ define void @s_shuffle_v2i32_v8i32__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19495,21 +19495,21 @@ define void @s_shuffle_v2i32_v8i32__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19542,17 +19542,17 @@ define void @s_shuffle_v2i32_v8i32__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19604,18 +19604,18 @@ define void @s_shuffle_v2i32_v8i32__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19649,18 +19649,18 @@ define void @s_shuffle_v2i32_v8i32__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19694,18 +19694,18 @@ define void @s_shuffle_v2i32_v8i32__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19757,18 +19757,18 @@ define void @s_shuffle_v2i32_v8i32__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19800,17 +19800,17 @@ define void @s_shuffle_v2i32_v8i32__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19844,18 +19844,18 @@ define void @s_shuffle_v2i32_v8i32__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19887,17 +19887,17 @@ define void @s_shuffle_v2i32_v8i32__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -19937,21 +19937,21 @@ define void @s_shuffle_v2i32_v8i32__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -19992,21 +19992,21 @@ define void @s_shuffle_v2i32_v8i32__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20047,21 +20047,21 @@ define void @s_shuffle_v2i32_v8i32__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20100,21 +20100,21 @@ define void @s_shuffle_v2i32_v8i32__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20155,21 +20155,21 @@ define void @s_shuffle_v2i32_v8i32__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20210,21 +20210,21 @@ define void @s_shuffle_v2i32_v8i32__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20269,17 +20269,17 @@ define void @s_shuffle_v2i32_v8i32__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20311,17 +20311,17 @@ define void @s_shuffle_v2i32_v8i32__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20353,17 +20353,17 @@ define void @s_shuffle_v2i32_v8i32__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20395,17 +20395,17 @@ define void @s_shuffle_v2i32_v8i32__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20435,17 +20435,17 @@ define void @s_shuffle_v2i32_v8i32__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20477,17 +20477,17 @@ define void @s_shuffle_v2i32_v8i32__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20519,17 +20519,17 @@ define void @s_shuffle_v2i32_v8i32__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20561,17 +20561,17 @@ define void @s_shuffle_v2i32_v8i32__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x i32> %shuf) @@ -20619,18 +20619,18 @@ define void @s_shuffle_v2i32_v8i32__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20665,18 +20665,18 @@ define void @s_shuffle_v2i32_v8i32__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20711,18 +20711,18 @@ define void @s_shuffle_v2i32_v8i32__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20776,18 +20776,18 @@ define void @s_shuffle_v2i32_v8i32__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20822,18 +20822,18 @@ define void @s_shuffle_v2i32_v8i32__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20864,17 +20864,17 @@ define void @s_shuffle_v2i32_v8i32__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20913,20 +20913,20 @@ define void @s_shuffle_v2i32_v8i32__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -20965,20 +20965,20 @@ define void @s_shuffle_v2i32_v8i32__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21019,22 +21019,22 @@ define void @s_shuffle_v2i32_v8i32__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21073,20 +21073,20 @@ define void @s_shuffle_v2i32_v8i32__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21125,21 +21125,21 @@ define void @s_shuffle_v2i32_v8i32__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21178,20 +21178,20 @@ define void @s_shuffle_v2i32_v8i32__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21232,21 +21232,21 @@ define void @s_shuffle_v2i32_v8i32__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21285,20 +21285,20 @@ define void @s_shuffle_v2i32_v8i32__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21329,17 +21329,17 @@ define void @s_shuffle_v2i32_v8i32__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21393,18 +21393,18 @@ define void @s_shuffle_v2i32_v8i32__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21496,18 +21496,18 @@ define void @s_shuffle_v2i32_v8i32__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21540,17 +21540,17 @@ define void @s_shuffle_v2i32_v8i32__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21589,20 +21589,20 @@ define void @s_shuffle_v2i32_v8i32__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21643,22 +21643,22 @@ define void @s_shuffle_v2i32_v8i32__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21699,22 +21699,22 @@ define void @s_shuffle_v2i32_v8i32__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21755,22 +21755,22 @@ define void @s_shuffle_v2i32_v8i32__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21809,21 +21809,21 @@ define void @s_shuffle_v2i32_v8i32__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21864,21 +21864,21 @@ define void @s_shuffle_v2i32_v8i32__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21919,21 +21919,21 @@ define void @s_shuffle_v2i32_v8i32__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -21974,21 +21974,21 @@ define void @s_shuffle_v2i32_v8i32__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22042,18 +22042,18 @@ define void @s_shuffle_v2i32_v8i32__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22088,18 +22088,18 @@ define void @s_shuffle_v2i32_v8i32__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22134,18 +22134,18 @@ define void @s_shuffle_v2i32_v8i32__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22199,18 +22199,18 @@ define void @s_shuffle_v2i32_v8i32__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22245,18 +22245,18 @@ define void @s_shuffle_v2i32_v8i32__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22289,17 +22289,17 @@ define void @s_shuffle_v2i32_v8i32__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22338,20 +22338,20 @@ define void @s_shuffle_v2i32_v8i32__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22392,22 +22392,22 @@ define void @s_shuffle_v2i32_v8i32__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22448,22 +22448,22 @@ define void @s_shuffle_v2i32_v8i32__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22504,22 +22504,22 @@ define void @s_shuffle_v2i32_v8i32__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22558,21 +22558,21 @@ define void @s_shuffle_v2i32_v8i32__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22613,21 +22613,21 @@ define void @s_shuffle_v2i32_v8i32__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22668,21 +22668,21 @@ define void @s_shuffle_v2i32_v8i32__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22723,21 +22723,21 @@ define void @s_shuffle_v2i32_v8i32__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22791,18 +22791,18 @@ define void @s_shuffle_v2i32_v8i32__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22835,17 +22835,17 @@ define void @s_shuffle_v2i32_v8i32__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22880,18 +22880,18 @@ define void @s_shuffle_v2i32_v8i32__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22945,18 +22945,18 @@ define void @s_shuffle_v2i32_v8i32__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -22991,18 +22991,18 @@ define void @s_shuffle_v2i32_v8i32__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23035,17 +23035,17 @@ define void @s_shuffle_v2i32_v8i32__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23084,20 +23084,20 @@ define void @s_shuffle_v2i32_v8i32__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23138,21 +23138,21 @@ define void @s_shuffle_v2i32_v8i32__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23193,22 +23193,22 @@ define void @s_shuffle_v2i32_v8i32__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s8 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s8 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23249,21 +23249,21 @@ define void @s_shuffle_v2i32_v8i32__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23302,21 +23302,21 @@ define void @s_shuffle_v2i32_v8i32__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23357,21 +23357,21 @@ define void @s_shuffle_v2i32_v8i32__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23412,21 +23412,21 @@ define void @s_shuffle_v2i32_v8i32__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s12 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s12 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23467,21 +23467,21 @@ define void @s_shuffle_v2i32_v8i32__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23535,18 +23535,18 @@ define void @s_shuffle_v2i32_v8i32__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23581,18 +23581,18 @@ define void @s_shuffle_v2i32_v8i32__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23627,18 +23627,18 @@ define void @s_shuffle_v2i32_v8i32__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23692,18 +23692,18 @@ define void @s_shuffle_v2i32_v8i32__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23738,18 +23738,18 @@ define void @s_shuffle_v2i32_v8i32__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23780,17 +23780,17 @@ define void @s_shuffle_v2i32_v8i32__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23829,20 +23829,20 @@ define void @s_shuffle_v2i32_v8i32__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23881,21 +23881,21 @@ define void @s_shuffle_v2i32_v8i32__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23936,22 +23936,22 @@ define void @s_shuffle_v2i32_v8i32__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -23990,21 +23990,21 @@ define void @s_shuffle_v2i32_v8i32__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24043,21 +24043,21 @@ define void @s_shuffle_v2i32_v8i32__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24096,21 +24096,21 @@ define void @s_shuffle_v2i32_v8i32__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24151,21 +24151,21 @@ define void @s_shuffle_v2i32_v8i32__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24204,21 +24204,21 @@ define void @s_shuffle_v2i32_v8i32__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24291,18 +24291,18 @@ define void @s_shuffle_v2i32_v8i32__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24352,17 +24352,17 @@ define void @s_shuffle_v2i32_v8i32__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24416,18 +24416,18 @@ define void @s_shuffle_v2i32_v8i32__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24460,17 +24460,17 @@ define void @s_shuffle_v2i32_v8i32__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24509,20 +24509,20 @@ define void @s_shuffle_v2i32_v8i32__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24563,22 +24563,22 @@ define void @s_shuffle_v2i32_v8i32__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24619,22 +24619,22 @@ define void @s_shuffle_v2i32_v8i32__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24675,22 +24675,22 @@ define void @s_shuffle_v2i32_v8i32__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24729,21 +24729,21 @@ define void @s_shuffle_v2i32_v8i32__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24784,21 +24784,21 @@ define void @s_shuffle_v2i32_v8i32__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24839,21 +24839,21 @@ define void @s_shuffle_v2i32_v8i32__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s14 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s14 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24894,21 +24894,21 @@ define void @s_shuffle_v2i32_v8i32__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -24962,18 +24962,18 @@ define void @s_shuffle_v2i32_v8i32__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25008,18 +25008,18 @@ define void @s_shuffle_v2i32_v8i32__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25054,18 +25054,18 @@ define void @s_shuffle_v2i32_v8i32__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25119,18 +25119,18 @@ define void @s_shuffle_v2i32_v8i32__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25165,18 +25165,18 @@ define void @s_shuffle_v2i32_v8i32__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25209,17 +25209,17 @@ define void @s_shuffle_v2i32_v8i32__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25258,20 +25258,20 @@ define void @s_shuffle_v2i32_v8i32__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25312,22 +25312,22 @@ define void @s_shuffle_v2i32_v8i32__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25368,22 +25368,22 @@ define void @s_shuffle_v2i32_v8i32__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25424,22 +25424,22 @@ define void @s_shuffle_v2i32_v8i32__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25478,21 +25478,21 @@ define void @s_shuffle_v2i32_v8i32__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25533,21 +25533,21 @@ define void @s_shuffle_v2i32_v8i32__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25588,21 +25588,21 @@ define void @s_shuffle_v2i32_v8i32__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25643,21 +25643,21 @@ define void @s_shuffle_v2i32_v8i32__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25711,18 +25711,18 @@ define void @s_shuffle_v2i32_v8i32__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25757,18 +25757,18 @@ define void @s_shuffle_v2i32_v8i32__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25803,18 +25803,18 @@ define void @s_shuffle_v2i32_v8i32__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25868,18 +25868,18 @@ define void @s_shuffle_v2i32_v8i32__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> @@ -25912,17 +25912,17 @@ define void @s_shuffle_v2i32_v8i32__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i32_v8i32__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i32_v8i32__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i32> asm "; def $0", "=s"() %vec1 = call <8 x i32> asm "; def $0", "=s"() %shuf = shufflevector <8 x i32> %vec0, <8 x i32> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll index 8198a5d486f23..51dc9a51ec9d0 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i64_v2i64__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i64_v2i64__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -81,18 +81,18 @@ define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -137,18 +137,18 @@ define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v2i64_v2i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v2i64_v2i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v2i64_v2i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v2i64_v2i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -394,18 +394,18 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -439,18 +439,18 @@ define void @v_shuffle_v2i64_v2i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -484,19 +484,19 @@ define void @v_shuffle_v2i64_v2i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -530,18 +530,18 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -571,16 +571,16 @@ define void @v_shuffle_v2i64_v2i64__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -610,16 +610,16 @@ define void @v_shuffle_v2i64_v2i64__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -653,18 +653,18 @@ define void @v_shuffle_v2i64_v2i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -694,16 +694,16 @@ define void @v_shuffle_v2i64_v2i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -744,16 +744,16 @@ define void @v_shuffle_v2i64_v2i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -787,18 +787,18 @@ define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -839,16 +839,16 @@ define void @v_shuffle_v2i64_v2i64__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -889,22 +889,22 @@ define void @v_shuffle_v2i64_v2i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -945,22 +945,22 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -991,16 +991,16 @@ define void @v_shuffle_v2i64_v2i64__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v2i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v2i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1045,17 +1045,17 @@ define void @s_shuffle_v2i64_v2i64__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1089,18 +1089,18 @@ define void @s_shuffle_v2i64_v2i64__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1148,18 +1148,18 @@ define void @s_shuffle_v2i64_v2i64__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1204,23 +1204,23 @@ define void @s_shuffle_v2i64_v2i64__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1261,21 +1261,21 @@ define void @s_shuffle_v2i64_v2i64__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1314,20 +1314,20 @@ define void @s_shuffle_v2i64_v2i64__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1382,18 +1382,18 @@ define void @s_shuffle_v2i64_v2i64__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1450,20 +1450,20 @@ define void @s_shuffle_v2i64_v2i64__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1497,18 +1497,18 @@ define void @s_shuffle_v2i64_v2i64__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1538,17 +1538,17 @@ define void @s_shuffle_v2i64_v2i64__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1578,17 +1578,17 @@ define void @s_shuffle_v2i64_v2i64__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1637,17 +1637,17 @@ define void @s_shuffle_v2i64_v2i64__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1691,17 +1691,17 @@ define void @s_shuffle_v2i64_v2i64__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1735,18 +1735,18 @@ define void @s_shuffle_v2i64_v2i64__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1790,17 +1790,17 @@ define void @s_shuffle_v2i64_v2i64__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1841,21 +1841,21 @@ define void @s_shuffle_v2i64_v2i64__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1896,21 +1896,21 @@ define void @s_shuffle_v2i64_v2i64__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1941,17 +1941,17 @@ define void @s_shuffle_v2i64_v2i64__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v2i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v2i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll index 625704f0947b2..bc8a56a30d8f9 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i64_v3i64__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i64_v3i64__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -77,16 +77,16 @@ define void @v_shuffle_v2i64_v3i64__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -120,18 +120,18 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -172,16 +172,16 @@ define void @v_shuffle_v2i64_v3i64__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -216,18 +216,18 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -272,24 +272,24 @@ define void @v_shuffle_v2i64_v3i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -330,22 +330,22 @@ define void @v_shuffle_v2i64_v3i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -386,22 +386,22 @@ define void @v_shuffle_v2i64_v3i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -440,20 +440,20 @@ define void @v_shuffle_v2i64_v3i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -488,18 +488,18 @@ define void @v_shuffle_v2i64_v3i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -534,18 +534,18 @@ define void @v_shuffle_v2i64_v3i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -580,18 +580,18 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -625,18 +625,18 @@ define void @v_shuffle_v2i64_v3i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -670,18 +670,18 @@ define void @v_shuffle_v2i64_v3i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -719,20 +719,20 @@ define void @v_shuffle_v2i64_v3i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -766,18 +766,18 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -817,22 +817,22 @@ define void @v_shuffle_v2i64_v3i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -863,16 +863,16 @@ define void @v_shuffle_v2i64_v3i64__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -902,16 +902,16 @@ define void @v_shuffle_v2i64_v3i64__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -945,18 +945,18 @@ define void @v_shuffle_v2i64_v3i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -990,18 +990,18 @@ define void @v_shuffle_v2i64_v3i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1031,16 +1031,16 @@ define void @v_shuffle_v2i64_v3i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1080,22 +1080,22 @@ define void @v_shuffle_v2i64_v3i64__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1126,16 +1126,16 @@ define void @v_shuffle_v2i64_v3i64__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1169,18 +1169,18 @@ define void @v_shuffle_v2i64_v3i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1210,16 +1210,16 @@ define void @v_shuffle_v2i64_v3i64__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1253,18 +1253,18 @@ define void @v_shuffle_v2i64_v3i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1294,16 +1294,16 @@ define void @v_shuffle_v2i64_v3i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1343,22 +1343,22 @@ define void @v_shuffle_v2i64_v3i64__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1400,16 +1400,16 @@ define void @v_shuffle_v2i64_v3i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1439,16 +1439,16 @@ define void @v_shuffle_v2i64_v3i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1482,18 +1482,18 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1538,18 +1538,18 @@ define void @v_shuffle_v2i64_v3i64__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1580,16 +1580,16 @@ define void @v_shuffle_v2i64_v3i64__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1630,22 +1630,22 @@ define void @v_shuffle_v2i64_v3i64__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1686,22 +1686,22 @@ define void @v_shuffle_v2i64_v3i64__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1742,22 +1742,22 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1788,16 +1788,16 @@ define void @v_shuffle_v2i64_v3i64__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1832,18 +1832,18 @@ define void @v_shuffle_v2i64_v3i64__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1874,16 +1874,16 @@ define void @v_shuffle_v2i64_v3i64__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1924,22 +1924,22 @@ define void @v_shuffle_v2i64_v3i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -1980,22 +1980,22 @@ define void @v_shuffle_v2i64_v3i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2036,22 +2036,22 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2086,18 +2086,18 @@ define void @v_shuffle_v2i64_v3i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2128,16 +2128,16 @@ define void @v_shuffle_v2i64_v3i64__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v3i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v3i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2182,17 +2182,17 @@ define void @s_shuffle_v2i64_v3i64__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2226,18 +2226,18 @@ define void @s_shuffle_v2i64_v3i64__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2267,18 +2267,18 @@ define void @s_shuffle_v2i64_v3i64__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2326,18 +2326,18 @@ define void @s_shuffle_v2i64_v3i64__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2368,18 +2368,18 @@ define void @s_shuffle_v2i64_v3i64__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2424,21 +2424,21 @@ define void @s_shuffle_v2i64_v3i64__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2475,21 +2475,21 @@ define void @s_shuffle_v2i64_v3i64__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2530,23 +2530,23 @@ define void @s_shuffle_v2i64_v3i64__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2581,20 +2581,20 @@ define void @s_shuffle_v2i64_v3i64__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2653,20 +2653,20 @@ define void @s_shuffle_v2i64_v3i64__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2701,18 +2701,18 @@ define void @s_shuffle_v2i64_v3i64__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2769,20 +2769,20 @@ define void @s_shuffle_v2i64_v3i64__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2816,20 +2816,20 @@ define void @s_shuffle_v2i64_v3i64__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2863,18 +2863,18 @@ define void @s_shuffle_v2i64_v3i64__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2918,23 +2918,23 @@ define void @s_shuffle_v2i64_v3i64__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2965,17 +2965,17 @@ define void @s_shuffle_v2i64_v3i64__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3005,17 +3005,17 @@ define void @s_shuffle_v2i64_v3i64__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3083,17 +3083,17 @@ define void @s_shuffle_v2i64_v3i64__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3133,21 +3133,21 @@ define void @s_shuffle_v2i64_v3i64__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3182,18 +3182,18 @@ define void @s_shuffle_v2i64_v3i64__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3250,20 +3250,20 @@ define void @s_shuffle_v2i64_v3i64__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3301,20 +3301,20 @@ define void @s_shuffle_v2i64_v3i64__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3348,18 +3348,18 @@ define void @s_shuffle_v2i64_v3i64__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3403,23 +3403,23 @@ define void @s_shuffle_v2i64_v3i64__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3464,17 +3464,17 @@ define void @s_shuffle_v2i64_v3i64__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3508,18 +3508,18 @@ define void @s_shuffle_v2i64_v3i64__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3549,18 +3549,18 @@ define void @s_shuffle_v2i64_v3i64__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3612,20 +3612,20 @@ define void @s_shuffle_v2i64_v3i64__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3656,17 +3656,17 @@ define void @s_shuffle_v2i64_v3i64__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3707,21 +3707,21 @@ define void @s_shuffle_v2i64_v3i64__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3762,21 +3762,21 @@ define void @s_shuffle_v2i64_v3i64__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3817,21 +3817,21 @@ define void @s_shuffle_v2i64_v3i64__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3862,17 +3862,17 @@ define void @s_shuffle_v2i64_v3i64__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3927,18 +3927,18 @@ define void @s_shuffle_v2i64_v3i64__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3979,21 +3979,21 @@ define void @s_shuffle_v2i64_v3i64__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -4038,23 +4038,23 @@ define void @s_shuffle_v2i64_v3i64__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -4095,23 +4095,23 @@ define void @s_shuffle_v2i64_v3i64__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -4170,20 +4170,20 @@ define void @s_shuffle_v2i64_v3i64__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v3i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll index b0258985bfa90..dd42a1dd44320 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i64_v4i64__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i64_v4i64__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -77,16 +77,16 @@ define void @v_shuffle_v2i64_v4i64__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -116,16 +116,16 @@ define void @v_shuffle_v2i64_v4i64__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -159,18 +159,18 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -211,16 +211,16 @@ define void @v_shuffle_v2i64_v4i64__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -251,16 +251,16 @@ define void @v_shuffle_v2i64_v4i64__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -295,18 +295,18 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -351,24 +351,24 @@ define void @v_shuffle_v2i64_v4i64__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -409,22 +409,22 @@ define void @v_shuffle_v2i64_v4i64__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -465,22 +465,22 @@ define void @v_shuffle_v2i64_v4i64__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -521,22 +521,22 @@ define void @v_shuffle_v2i64_v4i64__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -575,20 +575,20 @@ define void @v_shuffle_v2i64_v4i64__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -623,18 +623,18 @@ define void @v_shuffle_v2i64_v4i64__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -669,18 +669,18 @@ define void @v_shuffle_v2i64_v4i64__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -715,18 +715,18 @@ define void @v_shuffle_v2i64_v4i64__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -761,18 +761,18 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -806,18 +806,18 @@ define void @v_shuffle_v2i64_v4i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -851,18 +851,18 @@ define void @v_shuffle_v2i64_v4i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -896,18 +896,18 @@ define void @v_shuffle_v2i64_v4i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -945,20 +945,20 @@ define void @v_shuffle_v2i64_v4i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -992,18 +992,18 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1043,22 +1043,22 @@ define void @v_shuffle_v2i64_v4i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1099,22 +1099,22 @@ define void @v_shuffle_v2i64_v4i64__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1145,16 +1145,16 @@ define void @v_shuffle_v2i64_v4i64__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1184,16 +1184,16 @@ define void @v_shuffle_v2i64_v4i64__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1227,18 +1227,18 @@ define void @v_shuffle_v2i64_v4i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1272,18 +1272,18 @@ define void @v_shuffle_v2i64_v4i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1317,18 +1317,18 @@ define void @v_shuffle_v2i64_v4i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1358,16 +1358,16 @@ define void @v_shuffle_v2i64_v4i64__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1407,22 +1407,22 @@ define void @v_shuffle_v2i64_v4i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1463,22 +1463,22 @@ define void @v_shuffle_v2i64_v4i64__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1509,16 +1509,16 @@ define void @v_shuffle_v2i64_v4i64__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1552,18 +1552,18 @@ define void @v_shuffle_v2i64_v4i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1593,16 +1593,16 @@ define void @v_shuffle_v2i64_v4i64__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1636,18 +1636,18 @@ define void @v_shuffle_v2i64_v4i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1681,18 +1681,18 @@ define void @v_shuffle_v2i64_v4i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1722,16 +1722,16 @@ define void @v_shuffle_v2i64_v4i64__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1771,22 +1771,22 @@ define void @v_shuffle_v2i64_v4i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1827,22 +1827,22 @@ define void @v_shuffle_v2i64_v4i64__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -1873,16 +1873,16 @@ define void @v_shuffle_v2i64_v4i64__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1916,18 +1916,18 @@ define void @v_shuffle_v2i64_v4i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1961,18 +1961,18 @@ define void @v_shuffle_v2i64_v4i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2002,16 +2002,16 @@ define void @v_shuffle_v2i64_v4i64__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2045,18 +2045,18 @@ define void @v_shuffle_v2i64_v4i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2086,16 +2086,16 @@ define void @v_shuffle_v2i64_v4i64__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2135,22 +2135,22 @@ define void @v_shuffle_v2i64_v4i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2191,22 +2191,22 @@ define void @v_shuffle_v2i64_v4i64__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2248,16 +2248,16 @@ define void @v_shuffle_v2i64_v4i64__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2287,16 +2287,16 @@ define void @v_shuffle_v2i64_v4i64__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2326,16 +2326,16 @@ define void @v_shuffle_v2i64_v4i64__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2369,18 +2369,18 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2425,18 +2425,18 @@ define void @v_shuffle_v2i64_v4i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2471,18 +2471,18 @@ define void @v_shuffle_v2i64_v4i64__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2513,16 +2513,16 @@ define void @v_shuffle_v2i64_v4i64__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2563,22 +2563,22 @@ define void @v_shuffle_v2i64_v4i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v2i64_v4i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2675,22 +2675,22 @@ define void @v_shuffle_v2i64_v4i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2731,22 +2731,22 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2777,16 +2777,16 @@ define void @v_shuffle_v2i64_v4i64__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2821,18 +2821,18 @@ define void @v_shuffle_v2i64_v4i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2867,18 +2867,18 @@ define void @v_shuffle_v2i64_v4i64__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2909,16 +2909,16 @@ define void @v_shuffle_v2i64_v4i64__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -2959,22 +2959,22 @@ define void @v_shuffle_v2i64_v4i64__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3015,22 +3015,22 @@ define void @v_shuffle_v2i64_v4i64__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3071,22 +3071,22 @@ define void @v_shuffle_v2i64_v4i64__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3127,22 +3127,22 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3177,18 +3177,18 @@ define void @v_shuffle_v2i64_v4i64__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3219,16 +3219,16 @@ define void @v_shuffle_v2i64_v4i64__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3263,18 +3263,18 @@ define void @v_shuffle_v2i64_v4i64__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3305,16 +3305,16 @@ define void @v_shuffle_v2i64_v4i64__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3355,22 +3355,22 @@ define void @v_shuffle_v2i64_v4i64__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3411,22 +3411,22 @@ define void @v_shuffle_v2i64_v4i64__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3467,22 +3467,22 @@ define void @v_shuffle_v2i64_v4i64__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3523,22 +3523,22 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3573,18 +3573,18 @@ define void @v_shuffle_v2i64_v4i64__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3619,18 +3619,18 @@ define void @v_shuffle_v2i64_v4i64__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3661,16 +3661,16 @@ define void @v_shuffle_v2i64_v4i64__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v4i64__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v4i64__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3715,17 +3715,17 @@ define void @s_shuffle_v2i64_v4i64__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3759,18 +3759,18 @@ define void @s_shuffle_v2i64_v4i64__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3800,17 +3800,17 @@ define void @s_shuffle_v2i64_v4i64__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3844,18 +3844,18 @@ define void @s_shuffle_v2i64_v4i64__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3903,18 +3903,18 @@ define void @s_shuffle_v2i64_v4i64__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3945,17 +3945,17 @@ define void @s_shuffle_v2i64_v4i64__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -3990,18 +3990,18 @@ define void @s_shuffle_v2i64_v4i64__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4046,24 +4046,24 @@ define void @s_shuffle_v2i64_v4i64__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4104,21 +4104,21 @@ define void @s_shuffle_v2i64_v4i64__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4163,23 +4163,23 @@ define void @s_shuffle_v2i64_v4i64__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4220,22 +4220,22 @@ define void @s_shuffle_v2i64_v4i64__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4274,20 +4274,20 @@ define void @s_shuffle_v2i64_v4i64__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4346,20 +4346,20 @@ define void @s_shuffle_v2i64_v4i64__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4414,18 +4414,18 @@ define void @s_shuffle_v2i64_v4i64__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4482,20 +4482,20 @@ define void @s_shuffle_v2i64_v4i64__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4552,20 +4552,20 @@ define void @s_shuffle_v2i64_v4i64__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4599,18 +4599,18 @@ define void @s_shuffle_v2i64_v4i64__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4654,24 +4654,24 @@ define void @s_shuffle_v2i64_v4i64__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4712,22 +4712,22 @@ define void @s_shuffle_v2i64_v4i64__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4758,17 +4758,17 @@ define void @s_shuffle_v2i64_v4i64__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4798,17 +4798,17 @@ define void @s_shuffle_v2i64_v4i64__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4895,17 +4895,17 @@ define void @s_shuffle_v2i64_v4i64__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4945,21 +4945,21 @@ define void @s_shuffle_v2i64_v4i64__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5000,22 +5000,22 @@ define void @s_shuffle_v2i64_v4i64__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5050,18 +5050,18 @@ define void @s_shuffle_v2i64_v4i64__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5118,20 +5118,20 @@ define void @s_shuffle_v2i64_v4i64__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5188,20 +5188,20 @@ define void @s_shuffle_v2i64_v4i64__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5235,18 +5235,18 @@ define void @s_shuffle_v2i64_v4i64__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5290,23 +5290,23 @@ define void @s_shuffle_v2i64_v4i64__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5347,22 +5347,22 @@ define void @s_shuffle_v2i64_v4i64__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5393,17 +5393,17 @@ define void @s_shuffle_v2i64_v4i64__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5471,17 +5471,17 @@ define void @s_shuffle_v2i64_v4i64__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5530,17 +5530,17 @@ define void @s_shuffle_v2i64_v4i64__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5580,22 +5580,22 @@ define void @s_shuffle_v2i64_v4i64__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5636,22 +5636,22 @@ define void @s_shuffle_v2i64_v4i64__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5696,17 +5696,17 @@ define void @s_shuffle_v2i64_v4i64__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5740,18 +5740,18 @@ define void @s_shuffle_v2i64_v4i64__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5781,17 +5781,17 @@ define void @s_shuffle_v2i64_v4i64__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5825,18 +5825,18 @@ define void @s_shuffle_v2i64_v4i64__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5888,20 +5888,20 @@ define void @s_shuffle_v2i64_v4i64__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -5952,17 +5952,17 @@ define void @s_shuffle_v2i64_v4i64__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6003,21 +6003,21 @@ define void @s_shuffle_v2i64_v4i64__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6058,21 +6058,21 @@ define void @s_shuffle_v2i64_v4i64__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6113,22 +6113,22 @@ define void @s_shuffle_v2i64_v4i64__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6169,21 +6169,21 @@ define void @s_shuffle_v2i64_v4i64__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6214,17 +6214,17 @@ define void @s_shuffle_v2i64_v4i64__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6299,18 +6299,18 @@ define void @s_shuffle_v2i64_v4i64__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6351,21 +6351,21 @@ define void @s_shuffle_v2i64_v4i64__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6410,23 +6410,23 @@ define void @s_shuffle_v2i64_v4i64__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6467,22 +6467,22 @@ define void @s_shuffle_v2i64_v4i64__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6527,23 +6527,23 @@ define void @s_shuffle_v2i64_v4i64__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6602,20 +6602,20 @@ define void @s_shuffle_v2i64_v4i64__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6666,17 +6666,17 @@ define void @s_shuffle_v2i64_v4i64__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6717,21 +6717,21 @@ define void @s_shuffle_v2i64_v4i64__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6772,22 +6772,22 @@ define void @s_shuffle_v2i64_v4i64__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6828,22 +6828,22 @@ define void @s_shuffle_v2i64_v4i64__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6884,22 +6884,22 @@ define void @s_shuffle_v2i64_v4i64__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6970,17 +6970,17 @@ define void @s_shuffle_v2i64_v4i64__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v4i64__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index b06739392e507..acc193a9393c1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2i64_v8i64__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2i64_v8i64__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -77,16 +77,16 @@ define void @v_shuffle_v2i64_v8i64__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -116,16 +116,16 @@ define void @v_shuffle_v2i64_v8i64__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -155,16 +155,16 @@ define void @v_shuffle_v2i64_v8i64__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -194,16 +194,16 @@ define void @v_shuffle_v2i64_v8i64__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -233,16 +233,16 @@ define void @v_shuffle_v2i64_v8i64__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -272,16 +272,16 @@ define void @v_shuffle_v2i64_v8i64__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -315,18 +315,18 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -367,16 +367,16 @@ define void @v_shuffle_v2i64_v8i64__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -407,16 +407,16 @@ define void @v_shuffle_v2i64_v8i64__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -447,16 +447,16 @@ define void @v_shuffle_v2i64_v8i64__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -487,16 +487,16 @@ define void @v_shuffle_v2i64_v8i64__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -527,16 +527,16 @@ define void @v_shuffle_v2i64_v8i64__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -567,16 +567,16 @@ define void @v_shuffle_v2i64_v8i64__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -611,18 +611,18 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -667,24 +667,24 @@ define void @v_shuffle_v2i64_v8i64__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v16 -; GFX940-NEXT: v_mov_b32_e32 v3, v17 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v16 +; GFX942-NEXT: v_mov_b32_e32 v3, v17 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -725,22 +725,22 @@ define void @v_shuffle_v2i64_v8i64__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v18 -; GFX940-NEXT: v_mov_b32_e32 v1, v19 -; GFX940-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v18 +; GFX942-NEXT: v_mov_b32_e32 v1, v19 +; GFX942-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -781,22 +781,22 @@ define void @v_shuffle_v2i64_v8i64__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v20 -; GFX940-NEXT: v_mov_b32_e32 v3, v21 -; GFX940-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v20 +; GFX942-NEXT: v_mov_b32_e32 v3, v21 +; GFX942-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -837,22 +837,22 @@ define void @v_shuffle_v2i64_v8i64__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v22 -; GFX940-NEXT: v_mov_b32_e32 v5, v23 -; GFX940-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v22 +; GFX942-NEXT: v_mov_b32_e32 v5, v23 +; GFX942-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -893,22 +893,22 @@ define void @v_shuffle_v2i64_v8i64__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v24 -; GFX940-NEXT: v_mov_b32_e32 v7, v25 -; GFX940-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v24 +; GFX942-NEXT: v_mov_b32_e32 v7, v25 +; GFX942-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -949,22 +949,22 @@ define void @v_shuffle_v2i64_v8i64__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v26 -; GFX940-NEXT: v_mov_b32_e32 v9, v27 -; GFX940-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v26 +; GFX942-NEXT: v_mov_b32_e32 v9, v27 +; GFX942-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1005,22 +1005,22 @@ define void @v_shuffle_v2i64_v8i64__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v28 -; GFX940-NEXT: v_mov_b32_e32 v11, v29 -; GFX940-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v28 +; GFX942-NEXT: v_mov_b32_e32 v11, v29 +; GFX942-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1061,22 +1061,22 @@ define void @v_shuffle_v2i64_v8i64__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v30 -; GFX940-NEXT: v_mov_b32_e32 v13, v31 -; GFX940-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v30 +; GFX942-NEXT: v_mov_b32_e32 v13, v31 +; GFX942-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1115,20 +1115,20 @@ define void @v_shuffle_v2i64_v8i64__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1163,18 +1163,18 @@ define void @v_shuffle_v2i64_v8i64__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1209,18 +1209,18 @@ define void @v_shuffle_v2i64_v8i64__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1255,18 +1255,18 @@ define void @v_shuffle_v2i64_v8i64__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1301,18 +1301,18 @@ define void @v_shuffle_v2i64_v8i64__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v14 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v14 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1347,18 +1347,18 @@ define void @v_shuffle_v2i64_v8i64__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1393,18 +1393,18 @@ define void @v_shuffle_v2i64_v8i64__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1439,18 +1439,18 @@ define void @v_shuffle_v2i64_v8i64__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -1485,18 +1485,18 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1530,18 +1530,18 @@ define void @v_shuffle_v2i64_v8i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1575,18 +1575,18 @@ define void @v_shuffle_v2i64_v8i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1620,18 +1620,18 @@ define void @v_shuffle_v2i64_v8i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1665,18 +1665,18 @@ define void @v_shuffle_v2i64_v8i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1710,18 +1710,18 @@ define void @v_shuffle_v2i64_v8i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1755,18 +1755,18 @@ define void @v_shuffle_v2i64_v8i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v0 -; GFX940-NEXT: v_mov_b32_e32 v13, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v0 +; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1800,18 +1800,18 @@ define void @v_shuffle_v2i64_v8i64__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v0 -; GFX940-NEXT: v_mov_b32_e32 v15, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v0 +; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1849,20 +1849,20 @@ define void @v_shuffle_v2i64_v8i64__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1896,18 +1896,18 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1947,22 +1947,22 @@ define void @v_shuffle_v2i64_v8i64__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2003,22 +2003,22 @@ define void @v_shuffle_v2i64_v8i64__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2059,22 +2059,22 @@ define void @v_shuffle_v2i64_v8i64__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2115,22 +2115,22 @@ define void @v_shuffle_v2i64_v8i64__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v0 -; GFX940-NEXT: v_mov_b32_e32 v13, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v0 +; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2171,22 +2171,22 @@ define void @v_shuffle_v2i64_v8i64__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v0 -; GFX940-NEXT: v_mov_b32_e32 v15, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v0 +; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2227,22 +2227,22 @@ define void @v_shuffle_v2i64_v8i64__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v0 -; GFX940-NEXT: v_mov_b32_e32 v17, v1 -; GFX940-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v0 +; GFX942-NEXT: v_mov_b32_e32 v17, v1 +; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2273,16 +2273,16 @@ define void @v_shuffle_v2i64_v8i64__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2312,16 +2312,16 @@ define void @v_shuffle_v2i64_v8i64__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2355,18 +2355,18 @@ define void @v_shuffle_v2i64_v8i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2400,18 +2400,18 @@ define void @v_shuffle_v2i64_v8i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2445,18 +2445,18 @@ define void @v_shuffle_v2i64_v8i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2490,18 +2490,18 @@ define void @v_shuffle_v2i64_v8i64__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2535,18 +2535,18 @@ define void @v_shuffle_v2i64_v8i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v2 -; GFX940-NEXT: v_mov_b32_e32 v13, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v2 +; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2580,18 +2580,18 @@ define void @v_shuffle_v2i64_v8i64__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v2 -; GFX940-NEXT: v_mov_b32_e32 v15, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v2 +; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2625,18 +2625,18 @@ define void @v_shuffle_v2i64_v8i64__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2666,16 +2666,16 @@ define void @v_shuffle_v2i64_v8i64__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2715,22 +2715,22 @@ define void @v_shuffle_v2i64_v8i64__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2771,22 +2771,22 @@ define void @v_shuffle_v2i64_v8i64__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2827,22 +2827,22 @@ define void @v_shuffle_v2i64_v8i64__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v2 -; GFX940-NEXT: v_mov_b32_e32 v13, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v2 +; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2883,22 +2883,22 @@ define void @v_shuffle_v2i64_v8i64__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v2 -; GFX940-NEXT: v_mov_b32_e32 v15, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v2 +; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2939,22 +2939,22 @@ define void @v_shuffle_v2i64_v8i64__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v2 -; GFX940-NEXT: v_mov_b32_e32 v17, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v2 +; GFX942-NEXT: v_mov_b32_e32 v17, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -2995,22 +2995,22 @@ define void @v_shuffle_v2i64_v8i64__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v2 -; GFX940-NEXT: v_mov_b32_e32 v19, v3 -; GFX940-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v2 +; GFX942-NEXT: v_mov_b32_e32 v19, v3 +; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3041,16 +3041,16 @@ define void @v_shuffle_v2i64_v8i64__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3084,18 +3084,18 @@ define void @v_shuffle_v2i64_v8i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3125,16 +3125,16 @@ define void @v_shuffle_v2i64_v8i64__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3168,18 +3168,18 @@ define void @v_shuffle_v2i64_v8i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3213,18 +3213,18 @@ define void @v_shuffle_v2i64_v8i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3258,18 +3258,18 @@ define void @v_shuffle_v2i64_v8i64__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3303,18 +3303,18 @@ define void @v_shuffle_v2i64_v8i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3348,18 +3348,18 @@ define void @v_shuffle_v2i64_v8i64__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v4 -; GFX940-NEXT: v_mov_b32_e32 v15, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v4 +; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3393,18 +3393,18 @@ define void @v_shuffle_v2i64_v8i64__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3434,16 +3434,16 @@ define void @v_shuffle_v2i64_v8i64__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3483,22 +3483,22 @@ define void @v_shuffle_v2i64_v8i64__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3539,22 +3539,22 @@ define void @v_shuffle_v2i64_v8i64__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3595,22 +3595,22 @@ define void @v_shuffle_v2i64_v8i64__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v4 -; GFX940-NEXT: v_mov_b32_e32 v15, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v4 +; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3651,22 +3651,22 @@ define void @v_shuffle_v2i64_v8i64__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v4 -; GFX940-NEXT: v_mov_b32_e32 v17, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v4 +; GFX942-NEXT: v_mov_b32_e32 v17, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3707,22 +3707,22 @@ define void @v_shuffle_v2i64_v8i64__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v4 -; GFX940-NEXT: v_mov_b32_e32 v19, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v4 +; GFX942-NEXT: v_mov_b32_e32 v19, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3763,22 +3763,22 @@ define void @v_shuffle_v2i64_v8i64__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v4 -; GFX940-NEXT: v_mov_b32_e32 v21, v5 -; GFX940-NEXT: global_store_dwordx4 v22, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v4 +; GFX942-NEXT: v_mov_b32_e32 v21, v5 +; GFX942-NEXT: global_store_dwordx4 v22, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -3809,16 +3809,16 @@ define void @v_shuffle_v2i64_v8i64__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3852,18 +3852,18 @@ define void @v_shuffle_v2i64_v8i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3897,18 +3897,18 @@ define void @v_shuffle_v2i64_v8i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3938,16 +3938,16 @@ define void @v_shuffle_v2i64_v8i64__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3981,18 +3981,18 @@ define void @v_shuffle_v2i64_v8i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4026,18 +4026,18 @@ define void @v_shuffle_v2i64_v8i64__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4071,18 +4071,18 @@ define void @v_shuffle_v2i64_v8i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4116,18 +4116,18 @@ define void @v_shuffle_v2i64_v8i64__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4161,18 +4161,18 @@ define void @v_shuffle_v2i64_v8i64__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4202,16 +4202,16 @@ define void @v_shuffle_v2i64_v8i64__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4251,22 +4251,22 @@ define void @v_shuffle_v2i64_v8i64__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4307,22 +4307,22 @@ define void @v_shuffle_v2i64_v8i64__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4363,22 +4363,22 @@ define void @v_shuffle_v2i64_v8i64__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v6 -; GFX940-NEXT: v_mov_b32_e32 v17, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v6 +; GFX942-NEXT: v_mov_b32_e32 v17, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4419,22 +4419,22 @@ define void @v_shuffle_v2i64_v8i64__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v6 -; GFX940-NEXT: v_mov_b32_e32 v19, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v6 +; GFX942-NEXT: v_mov_b32_e32 v19, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4475,22 +4475,22 @@ define void @v_shuffle_v2i64_v8i64__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v6 -; GFX940-NEXT: v_mov_b32_e32 v21, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v6 +; GFX942-NEXT: v_mov_b32_e32 v21, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4531,22 +4531,22 @@ define void @v_shuffle_v2i64_v8i64__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v6 -; GFX940-NEXT: v_mov_b32_e32 v23, v7 -; GFX940-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v6 +; GFX942-NEXT: v_mov_b32_e32 v23, v7 +; GFX942-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -4577,16 +4577,16 @@ define void @v_shuffle_v2i64_v8i64__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4620,18 +4620,18 @@ define void @v_shuffle_v2i64_v8i64__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4665,18 +4665,18 @@ define void @v_shuffle_v2i64_v8i64__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4710,18 +4710,18 @@ define void @v_shuffle_v2i64_v8i64__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4751,16 +4751,16 @@ define void @v_shuffle_v2i64_v8i64__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4794,18 +4794,18 @@ define void @v_shuffle_v2i64_v8i64__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4839,18 +4839,18 @@ define void @v_shuffle_v2i64_v8i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v8 -; GFX940-NEXT: v_mov_b32_e32 v13, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v8 +; GFX942-NEXT: v_mov_b32_e32 v13, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4884,18 +4884,18 @@ define void @v_shuffle_v2i64_v8i64__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v8 -; GFX940-NEXT: v_mov_b32_e32 v15, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v8 +; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4929,18 +4929,18 @@ define void @v_shuffle_v2i64_v8i64__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v14 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v14 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4970,16 +4970,16 @@ define void @v_shuffle_v2i64_v8i64__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5019,22 +5019,22 @@ define void @v_shuffle_v2i64_v8i64__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v8 -; GFX940-NEXT: v_mov_b32_e32 v15, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v8 +; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5075,22 +5075,22 @@ define void @v_shuffle_v2i64_v8i64__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v8 -; GFX940-NEXT: v_mov_b32_e32 v17, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v8 +; GFX942-NEXT: v_mov_b32_e32 v17, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5131,22 +5131,22 @@ define void @v_shuffle_v2i64_v8i64__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v8 -; GFX940-NEXT: v_mov_b32_e32 v19, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v8 +; GFX942-NEXT: v_mov_b32_e32 v19, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5187,22 +5187,22 @@ define void @v_shuffle_v2i64_v8i64__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v8 -; GFX940-NEXT: v_mov_b32_e32 v21, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v8 +; GFX942-NEXT: v_mov_b32_e32 v21, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5243,22 +5243,22 @@ define void @v_shuffle_v2i64_v8i64__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v8 -; GFX940-NEXT: v_mov_b32_e32 v23, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v8 +; GFX942-NEXT: v_mov_b32_e32 v23, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5299,22 +5299,22 @@ define void @v_shuffle_v2i64_v8i64__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v24, v8 -; GFX940-NEXT: v_mov_b32_e32 v25, v9 -; GFX940-NEXT: global_store_dwordx4 v26, v[22:25], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v24, v8 +; GFX942-NEXT: v_mov_b32_e32 v25, v9 +; GFX942-NEXT: global_store_dwordx4 v26, v[22:25], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5345,16 +5345,16 @@ define void @v_shuffle_v2i64_v8i64__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5388,18 +5388,18 @@ define void @v_shuffle_v2i64_v8i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5433,18 +5433,18 @@ define void @v_shuffle_v2i64_v8i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5478,18 +5478,18 @@ define void @v_shuffle_v2i64_v8i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5523,18 +5523,18 @@ define void @v_shuffle_v2i64_v8i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5564,16 +5564,16 @@ define void @v_shuffle_v2i64_v8i64__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5607,18 +5607,18 @@ define void @v_shuffle_v2i64_v8i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5652,18 +5652,18 @@ define void @v_shuffle_v2i64_v8i64__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v10 -; GFX940-NEXT: v_mov_b32_e32 v15, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v10 +; GFX942-NEXT: v_mov_b32_e32 v15, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5697,18 +5697,18 @@ define void @v_shuffle_v2i64_v8i64__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5738,16 +5738,16 @@ define void @v_shuffle_v2i64_v8i64__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5787,22 +5787,22 @@ define void @v_shuffle_v2i64_v8i64__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v10 -; GFX940-NEXT: v_mov_b32_e32 v17, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v10 +; GFX942-NEXT: v_mov_b32_e32 v17, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5843,22 +5843,22 @@ define void @v_shuffle_v2i64_v8i64__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v10 -; GFX940-NEXT: v_mov_b32_e32 v19, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v10 +; GFX942-NEXT: v_mov_b32_e32 v19, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5899,22 +5899,22 @@ define void @v_shuffle_v2i64_v8i64__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v10 -; GFX940-NEXT: v_mov_b32_e32 v21, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v10 +; GFX942-NEXT: v_mov_b32_e32 v21, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -5955,22 +5955,22 @@ define void @v_shuffle_v2i64_v8i64__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v10 -; GFX940-NEXT: v_mov_b32_e32 v23, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v10 +; GFX942-NEXT: v_mov_b32_e32 v23, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6011,22 +6011,22 @@ define void @v_shuffle_v2i64_v8i64__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v24, v10 -; GFX940-NEXT: v_mov_b32_e32 v25, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[22:25], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v24, v10 +; GFX942-NEXT: v_mov_b32_e32 v25, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[22:25], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6067,22 +6067,22 @@ define void @v_shuffle_v2i64_v8i64__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v26, v10 -; GFX940-NEXT: v_mov_b32_e32 v27, v11 -; GFX940-NEXT: global_store_dwordx4 v28, v[24:27], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v26, v10 +; GFX942-NEXT: v_mov_b32_e32 v27, v11 +; GFX942-NEXT: global_store_dwordx4 v28, v[24:27], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6113,16 +6113,16 @@ define void @v_shuffle_v2i64_v8i64__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6156,18 +6156,18 @@ define void @v_shuffle_v2i64_v8i64__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6201,18 +6201,18 @@ define void @v_shuffle_v2i64_v8i64__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v12 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v12 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6246,18 +6246,18 @@ define void @v_shuffle_v2i64_v8i64__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6291,18 +6291,18 @@ define void @v_shuffle_v2i64_v8i64__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6336,18 +6336,18 @@ define void @v_shuffle_v2i64_v8i64__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6377,16 +6377,16 @@ define void @v_shuffle_v2i64_v8i64__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6420,18 +6420,18 @@ define void @v_shuffle_v2i64_v8i64__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v12 -; GFX940-NEXT: v_mov_b32_e32 v15, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v12 +; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6465,18 +6465,18 @@ define void @v_shuffle_v2i64_v8i64__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6506,16 +6506,16 @@ define void @v_shuffle_v2i64_v8i64__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6555,22 +6555,22 @@ define void @v_shuffle_v2i64_v8i64__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v12 -; GFX940-NEXT: v_mov_b32_e32 v19, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v12 +; GFX942-NEXT: v_mov_b32_e32 v19, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6611,22 +6611,22 @@ define void @v_shuffle_v2i64_v8i64__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v12 -; GFX940-NEXT: v_mov_b32_e32 v21, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v12 +; GFX942-NEXT: v_mov_b32_e32 v21, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6667,22 +6667,22 @@ define void @v_shuffle_v2i64_v8i64__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v12 -; GFX940-NEXT: v_mov_b32_e32 v23, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v12 +; GFX942-NEXT: v_mov_b32_e32 v23, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6723,22 +6723,22 @@ define void @v_shuffle_v2i64_v8i64__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v24, v12 -; GFX940-NEXT: v_mov_b32_e32 v25, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[22:25], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v24, v12 +; GFX942-NEXT: v_mov_b32_e32 v25, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[22:25], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6779,22 +6779,22 @@ define void @v_shuffle_v2i64_v8i64__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v26, v12 -; GFX940-NEXT: v_mov_b32_e32 v27, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[24:27], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v26, v12 +; GFX942-NEXT: v_mov_b32_e32 v27, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[24:27], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6835,22 +6835,22 @@ define void @v_shuffle_v2i64_v8i64__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v28, v12 -; GFX940-NEXT: v_mov_b32_e32 v29, v13 -; GFX940-NEXT: global_store_dwordx4 v30, v[26:29], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v28, v12 +; GFX942-NEXT: v_mov_b32_e32 v29, v13 +; GFX942-NEXT: global_store_dwordx4 v30, v[26:29], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -6881,16 +6881,16 @@ define void @v_shuffle_v2i64_v8i64__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6924,18 +6924,18 @@ define void @v_shuffle_v2i64_v8i64__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6969,18 +6969,18 @@ define void @v_shuffle_v2i64_v8i64__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7014,18 +7014,18 @@ define void @v_shuffle_v2i64_v8i64__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v14 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v14 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7059,18 +7059,18 @@ define void @v_shuffle_v2i64_v8i64__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7104,18 +7104,18 @@ define void @v_shuffle_v2i64_v8i64__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7149,18 +7149,18 @@ define void @v_shuffle_v2i64_v8i64__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7190,16 +7190,16 @@ define void @v_shuffle_v2i64_v8i64__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7233,18 +7233,18 @@ define void @v_shuffle_v2i64_v8i64__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7274,16 +7274,16 @@ define void @v_shuffle_v2i64_v8i64__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7323,22 +7323,22 @@ define void @v_shuffle_v2i64_v8i64__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v14 -; GFX940-NEXT: v_mov_b32_e32 v21, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v14 +; GFX942-NEXT: v_mov_b32_e32 v21, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7379,22 +7379,22 @@ define void @v_shuffle_v2i64_v8i64__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v14 -; GFX940-NEXT: v_mov_b32_e32 v23, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v14 +; GFX942-NEXT: v_mov_b32_e32 v23, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7435,22 +7435,22 @@ define void @v_shuffle_v2i64_v8i64__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v24, v14 -; GFX940-NEXT: v_mov_b32_e32 v25, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v24, v14 +; GFX942-NEXT: v_mov_b32_e32 v25, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7491,22 +7491,22 @@ define void @v_shuffle_v2i64_v8i64__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v26, v14 -; GFX940-NEXT: v_mov_b32_e32 v27, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v26, v14 +; GFX942-NEXT: v_mov_b32_e32 v27, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7547,22 +7547,22 @@ define void @v_shuffle_v2i64_v8i64__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v28, v14 -; GFX940-NEXT: v_mov_b32_e32 v29, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v28, v14 +; GFX942-NEXT: v_mov_b32_e32 v29, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7603,22 +7603,22 @@ define void @v_shuffle_v2i64_v8i64__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v30, v14 -; GFX940-NEXT: v_mov_b32_e32 v31, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v30, v14 +; GFX942-NEXT: v_mov_b32_e32 v31, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -7660,16 +7660,16 @@ define void @v_shuffle_v2i64_v8i64__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7699,16 +7699,16 @@ define void @v_shuffle_v2i64_v8i64__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7738,16 +7738,16 @@ define void @v_shuffle_v2i64_v8i64__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7777,16 +7777,16 @@ define void @v_shuffle_v2i64_v8i64__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7816,16 +7816,16 @@ define void @v_shuffle_v2i64_v8i64__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7855,16 +7855,16 @@ define void @v_shuffle_v2i64_v8i64__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7894,16 +7894,16 @@ define void @v_shuffle_v2i64_v8i64__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7937,18 +7937,18 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> store <2 x i64> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7993,18 +7993,18 @@ define void @v_shuffle_v2i64_v8i64__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8039,18 +8039,18 @@ define void @v_shuffle_v2i64_v8i64__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8085,18 +8085,18 @@ define void @v_shuffle_v2i64_v8i64__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8131,18 +8131,18 @@ define void @v_shuffle_v2i64_v8i64__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8177,18 +8177,18 @@ define void @v_shuffle_v2i64_v8i64__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v0 -; GFX940-NEXT: v_mov_b32_e32 v13, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v0 +; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8223,18 +8223,18 @@ define void @v_shuffle_v2i64_v8i64__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v0 -; GFX940-NEXT: v_mov_b32_e32 v15, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v0 +; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8265,16 +8265,16 @@ define void @v_shuffle_v2i64_v8i64__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8315,22 +8315,22 @@ define void @v_shuffle_v2i64_v8i64__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8371,22 +8371,22 @@ define void @v_shuffle_v2i64_v8i64__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8427,22 +8427,22 @@ define void @v_shuffle_v2i64_v8i64__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8483,22 +8483,22 @@ define void @v_shuffle_v2i64_v8i64__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8539,22 +8539,22 @@ define void @v_shuffle_v2i64_v8i64__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8595,22 +8595,22 @@ define void @v_shuffle_v2i64_v8i64__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8651,22 +8651,22 @@ define void @v_shuffle_v2i64_v8i64__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v16 -; GFX940-NEXT: v_mov_b32_e32 v15, v17 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v16 +; GFX942-NEXT: v_mov_b32_e32 v15, v17 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8707,22 +8707,22 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v16, v14 -; GFX940-NEXT: v_mov_b32_e32 v17, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v16, v14 +; GFX942-NEXT: v_mov_b32_e32 v17, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8753,16 +8753,16 @@ define void @v_shuffle_v2i64_v8i64__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8797,18 +8797,18 @@ define void @v_shuffle_v2i64_v8i64__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8843,18 +8843,18 @@ define void @v_shuffle_v2i64_v8i64__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8889,18 +8889,18 @@ define void @v_shuffle_v2i64_v8i64__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8935,18 +8935,18 @@ define void @v_shuffle_v2i64_v8i64__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -8981,18 +8981,18 @@ define void @v_shuffle_v2i64_v8i64__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v2 -; GFX940-NEXT: v_mov_b32_e32 v13, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v2 +; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9027,18 +9027,18 @@ define void @v_shuffle_v2i64_v8i64__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v2 -; GFX940-NEXT: v_mov_b32_e32 v15, v3 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v2 +; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9069,16 +9069,16 @@ define void @v_shuffle_v2i64_v8i64__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9119,22 +9119,22 @@ define void @v_shuffle_v2i64_v8i64__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9175,22 +9175,22 @@ define void @v_shuffle_v2i64_v8i64__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9231,22 +9231,22 @@ define void @v_shuffle_v2i64_v8i64__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9287,22 +9287,22 @@ define void @v_shuffle_v2i64_v8i64__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9343,22 +9343,22 @@ define void @v_shuffle_v2i64_v8i64__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9399,22 +9399,22 @@ define void @v_shuffle_v2i64_v8i64__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v16 -; GFX940-NEXT: v_mov_b32_e32 v13, v17 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v16 +; GFX942-NEXT: v_mov_b32_e32 v13, v17 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9455,22 +9455,22 @@ define void @v_shuffle_v2i64_v8i64__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v18 -; GFX940-NEXT: v_mov_b32_e32 v15, v19 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v18 +; GFX942-NEXT: v_mov_b32_e32 v15, v19 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9511,22 +9511,22 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v18, v14 -; GFX940-NEXT: v_mov_b32_e32 v19, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v18, v14 +; GFX942-NEXT: v_mov_b32_e32 v19, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9561,18 +9561,18 @@ define void @v_shuffle_v2i64_v8i64__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9603,16 +9603,16 @@ define void @v_shuffle_v2i64_v8i64__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9647,18 +9647,18 @@ define void @v_shuffle_v2i64_v8i64__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9693,18 +9693,18 @@ define void @v_shuffle_v2i64_v8i64__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9739,18 +9739,18 @@ define void @v_shuffle_v2i64_v8i64__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9785,18 +9785,18 @@ define void @v_shuffle_v2i64_v8i64__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9831,18 +9831,18 @@ define void @v_shuffle_v2i64_v8i64__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v4 -; GFX940-NEXT: v_mov_b32_e32 v15, v5 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v4 +; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9873,16 +9873,16 @@ define void @v_shuffle_v2i64_v8i64__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9923,22 +9923,22 @@ define void @v_shuffle_v2i64_v8i64__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -9979,22 +9979,22 @@ define void @v_shuffle_v2i64_v8i64__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10035,22 +10035,22 @@ define void @v_shuffle_v2i64_v8i64__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10091,22 +10091,22 @@ define void @v_shuffle_v2i64_v8i64__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10147,22 +10147,22 @@ define void @v_shuffle_v2i64_v8i64__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v16 -; GFX940-NEXT: v_mov_b32_e32 v11, v17 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v16 +; GFX942-NEXT: v_mov_b32_e32 v11, v17 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10203,22 +10203,22 @@ define void @v_shuffle_v2i64_v8i64__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v18 -; GFX940-NEXT: v_mov_b32_e32 v13, v19 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v18 +; GFX942-NEXT: v_mov_b32_e32 v13, v19 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10259,22 +10259,22 @@ define void @v_shuffle_v2i64_v8i64__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v20 -; GFX940-NEXT: v_mov_b32_e32 v15, v21 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v20 +; GFX942-NEXT: v_mov_b32_e32 v15, v21 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10315,22 +10315,22 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v20, v14 -; GFX940-NEXT: v_mov_b32_e32 v21, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v20, v14 +; GFX942-NEXT: v_mov_b32_e32 v21, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10365,18 +10365,18 @@ define void @v_shuffle_v2i64_v8i64__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10411,18 +10411,18 @@ define void @v_shuffle_v2i64_v8i64__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10453,16 +10453,16 @@ define void @v_shuffle_v2i64_v8i64__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10497,18 +10497,18 @@ define void @v_shuffle_v2i64_v8i64__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10543,18 +10543,18 @@ define void @v_shuffle_v2i64_v8i64__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10589,18 +10589,18 @@ define void @v_shuffle_v2i64_v8i64__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10635,18 +10635,18 @@ define void @v_shuffle_v2i64_v8i64__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10677,16 +10677,16 @@ define void @v_shuffle_v2i64_v8i64__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10727,22 +10727,22 @@ define void @v_shuffle_v2i64_v8i64__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10783,22 +10783,22 @@ define void @v_shuffle_v2i64_v8i64__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v12 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v12 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10839,22 +10839,22 @@ define void @v_shuffle_v2i64_v8i64__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v14 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v14 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10895,22 +10895,22 @@ define void @v_shuffle_v2i64_v8i64__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v16 -; GFX940-NEXT: v_mov_b32_e32 v9, v17 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v16 +; GFX942-NEXT: v_mov_b32_e32 v9, v17 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -10951,22 +10951,22 @@ define void @v_shuffle_v2i64_v8i64__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v18 -; GFX940-NEXT: v_mov_b32_e32 v11, v19 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v18 +; GFX942-NEXT: v_mov_b32_e32 v11, v19 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11007,22 +11007,22 @@ define void @v_shuffle_v2i64_v8i64__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v20 -; GFX940-NEXT: v_mov_b32_e32 v13, v21 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v20 +; GFX942-NEXT: v_mov_b32_e32 v13, v21 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11063,22 +11063,22 @@ define void @v_shuffle_v2i64_v8i64__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v22 -; GFX940-NEXT: v_mov_b32_e32 v15, v23 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v22 +; GFX942-NEXT: v_mov_b32_e32 v15, v23 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11119,22 +11119,22 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v22, v14 -; GFX940-NEXT: v_mov_b32_e32 v23, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v22, v14 +; GFX942-NEXT: v_mov_b32_e32 v23, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11169,18 +11169,18 @@ define void @v_shuffle_v2i64_v8i64__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11215,18 +11215,18 @@ define void @v_shuffle_v2i64_v8i64__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11261,18 +11261,18 @@ define void @v_shuffle_v2i64_v8i64__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11303,16 +11303,16 @@ define void @v_shuffle_v2i64_v8i64__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11347,18 +11347,18 @@ define void @v_shuffle_v2i64_v8i64__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11393,18 +11393,18 @@ define void @v_shuffle_v2i64_v8i64__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v8 -; GFX940-NEXT: v_mov_b32_e32 v13, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v8 +; GFX942-NEXT: v_mov_b32_e32 v13, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11439,18 +11439,18 @@ define void @v_shuffle_v2i64_v8i64__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v8 -; GFX940-NEXT: v_mov_b32_e32 v15, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v8 +; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11481,16 +11481,16 @@ define void @v_shuffle_v2i64_v8i64__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11531,22 +11531,22 @@ define void @v_shuffle_v2i64_v8i64__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11587,22 +11587,22 @@ define void @v_shuffle_v2i64_v8i64__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11643,22 +11643,22 @@ define void @v_shuffle_v2i64_v8i64__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v16 -; GFX940-NEXT: v_mov_b32_e32 v7, v17 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v16 +; GFX942-NEXT: v_mov_b32_e32 v7, v17 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11699,22 +11699,22 @@ define void @v_shuffle_v2i64_v8i64__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v18 -; GFX940-NEXT: v_mov_b32_e32 v9, v19 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v18 +; GFX942-NEXT: v_mov_b32_e32 v9, v19 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11755,22 +11755,22 @@ define void @v_shuffle_v2i64_v8i64__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v20 -; GFX940-NEXT: v_mov_b32_e32 v11, v21 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v20 +; GFX942-NEXT: v_mov_b32_e32 v11, v21 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11811,22 +11811,22 @@ define void @v_shuffle_v2i64_v8i64__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v22 -; GFX940-NEXT: v_mov_b32_e32 v13, v23 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v22 +; GFX942-NEXT: v_mov_b32_e32 v13, v23 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11867,22 +11867,22 @@ define void @v_shuffle_v2i64_v8i64__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v24 -; GFX940-NEXT: v_mov_b32_e32 v15, v25 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v24 +; GFX942-NEXT: v_mov_b32_e32 v15, v25 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11923,22 +11923,22 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v24, v14 -; GFX940-NEXT: v_mov_b32_e32 v25, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v24, v14 +; GFX942-NEXT: v_mov_b32_e32 v25, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -11973,18 +11973,18 @@ define void @v_shuffle_v2i64_v8i64__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12019,18 +12019,18 @@ define void @v_shuffle_v2i64_v8i64__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12065,18 +12065,18 @@ define void @v_shuffle_v2i64_v8i64__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12111,18 +12111,18 @@ define void @v_shuffle_v2i64_v8i64__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12153,16 +12153,16 @@ define void @v_shuffle_v2i64_v8i64__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12197,18 +12197,18 @@ define void @v_shuffle_v2i64_v8i64__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12243,18 +12243,18 @@ define void @v_shuffle_v2i64_v8i64__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v10 -; GFX940-NEXT: v_mov_b32_e32 v15, v11 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v10 +; GFX942-NEXT: v_mov_b32_e32 v15, v11 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12285,16 +12285,16 @@ define void @v_shuffle_v2i64_v8i64__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12335,22 +12335,22 @@ define void @v_shuffle_v2i64_v8i64__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12391,22 +12391,22 @@ define void @v_shuffle_v2i64_v8i64__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v16 -; GFX940-NEXT: v_mov_b32_e32 v5, v17 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v16 +; GFX942-NEXT: v_mov_b32_e32 v5, v17 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12447,22 +12447,22 @@ define void @v_shuffle_v2i64_v8i64__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v18 -; GFX940-NEXT: v_mov_b32_e32 v7, v19 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v18 +; GFX942-NEXT: v_mov_b32_e32 v7, v19 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12503,22 +12503,22 @@ define void @v_shuffle_v2i64_v8i64__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v20 -; GFX940-NEXT: v_mov_b32_e32 v9, v21 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v20 +; GFX942-NEXT: v_mov_b32_e32 v9, v21 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12559,22 +12559,22 @@ define void @v_shuffle_v2i64_v8i64__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v22 -; GFX940-NEXT: v_mov_b32_e32 v11, v23 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v22 +; GFX942-NEXT: v_mov_b32_e32 v11, v23 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12615,22 +12615,22 @@ define void @v_shuffle_v2i64_v8i64__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v24 -; GFX940-NEXT: v_mov_b32_e32 v13, v25 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v24 +; GFX942-NEXT: v_mov_b32_e32 v13, v25 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12671,22 +12671,22 @@ define void @v_shuffle_v2i64_v8i64__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v26 -; GFX940-NEXT: v_mov_b32_e32 v15, v27 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v26 +; GFX942-NEXT: v_mov_b32_e32 v15, v27 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12727,22 +12727,22 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v26, v14 -; GFX940-NEXT: v_mov_b32_e32 v27, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v26, v14 +; GFX942-NEXT: v_mov_b32_e32 v27, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12777,18 +12777,18 @@ define void @v_shuffle_v2i64_v8i64__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12823,18 +12823,18 @@ define void @v_shuffle_v2i64_v8i64__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v12 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v12 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12869,18 +12869,18 @@ define void @v_shuffle_v2i64_v8i64__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12915,18 +12915,18 @@ define void @v_shuffle_v2i64_v8i64__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -12961,18 +12961,18 @@ define void @v_shuffle_v2i64_v8i64__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13003,16 +13003,16 @@ define void @v_shuffle_v2i64_v8i64__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13047,18 +13047,18 @@ define void @v_shuffle_v2i64_v8i64__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v12 -; GFX940-NEXT: v_mov_b32_e32 v15, v13 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v12 +; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13089,16 +13089,16 @@ define void @v_shuffle_v2i64_v8i64__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13139,22 +13139,22 @@ define void @v_shuffle_v2i64_v8i64__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v18, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v16 -; GFX940-NEXT: v_mov_b32_e32 v3, v17 -; GFX940-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v18, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v16 +; GFX942-NEXT: v_mov_b32_e32 v3, v17 +; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13195,22 +13195,22 @@ define void @v_shuffle_v2i64_v8i64__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v20, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v18 -; GFX940-NEXT: v_mov_b32_e32 v5, v19 -; GFX940-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v20, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v18 +; GFX942-NEXT: v_mov_b32_e32 v5, v19 +; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13251,22 +13251,22 @@ define void @v_shuffle_v2i64_v8i64__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v22, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v20 -; GFX940-NEXT: v_mov_b32_e32 v7, v21 -; GFX940-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v22, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v20 +; GFX942-NEXT: v_mov_b32_e32 v7, v21 +; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13307,22 +13307,22 @@ define void @v_shuffle_v2i64_v8i64__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v24, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v22 -; GFX940-NEXT: v_mov_b32_e32 v9, v23 -; GFX940-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v24, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v22 +; GFX942-NEXT: v_mov_b32_e32 v9, v23 +; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13363,22 +13363,22 @@ define void @v_shuffle_v2i64_v8i64__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v26, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[10:25] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v24 -; GFX940-NEXT: v_mov_b32_e32 v11, v25 -; GFX940-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v26, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[10:25] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v24 +; GFX942-NEXT: v_mov_b32_e32 v11, v25 +; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13419,22 +13419,22 @@ define void @v_shuffle_v2i64_v8i64__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v28, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v26 -; GFX940-NEXT: v_mov_b32_e32 v13, v27 -; GFX940-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v28, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v26 +; GFX942-NEXT: v_mov_b32_e32 v13, v27 +; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13475,22 +13475,22 @@ define void @v_shuffle_v2i64_v8i64__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[14:29] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v28 -; GFX940-NEXT: v_mov_b32_e32 v15, v29 -; GFX940-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[14:29] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v28 +; GFX942-NEXT: v_mov_b32_e32 v15, v29 +; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13531,22 +13531,22 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v32, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v28, v14 -; GFX940-NEXT: v_mov_b32_e32 v29, v15 -; GFX940-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v28, v14 +; GFX942-NEXT: v_mov_b32_e32 v29, v15 +; GFX942-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13581,18 +13581,18 @@ define void @v_shuffle_v2i64_v8i64__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13627,18 +13627,18 @@ define void @v_shuffle_v2i64_v8i64__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13673,18 +13673,18 @@ define void @v_shuffle_v2i64_v8i64__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v14 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v14 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13719,18 +13719,18 @@ define void @v_shuffle_v2i64_v8i64__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13765,18 +13765,18 @@ define void @v_shuffle_v2i64_v8i64__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13811,18 +13811,18 @@ define void @v_shuffle_v2i64_v8i64__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13853,16 +13853,16 @@ define void @v_shuffle_v2i64_v8i64__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2i64_v8i64__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2i64_v8i64__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=v"() %vec1 = call <8 x i64> asm "; def $0", "=v"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -13907,17 +13907,17 @@ define void @s_shuffle_v2i64_v8i64__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -13951,18 +13951,18 @@ define void @s_shuffle_v2i64_v8i64__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -13992,17 +13992,17 @@ define void @s_shuffle_v2i64_v8i64__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14036,18 +14036,18 @@ define void @s_shuffle_v2i64_v8i64__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14081,17 +14081,17 @@ define void @s_shuffle_v2i64_v8i64__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14125,18 +14125,18 @@ define void @s_shuffle_v2i64_v8i64__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14170,18 +14170,18 @@ define void @s_shuffle_v2i64_v8i64__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14215,18 +14215,18 @@ define void @s_shuffle_v2i64_v8i64__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -14274,18 +14274,18 @@ define void @s_shuffle_v2i64_v8i64__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14316,17 +14316,17 @@ define void @s_shuffle_v2i64_v8i64__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14361,18 +14361,18 @@ define void @s_shuffle_v2i64_v8i64__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14407,17 +14407,17 @@ define void @s_shuffle_v2i64_v8i64__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14452,18 +14452,18 @@ define void @s_shuffle_v2i64_v8i64__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14498,18 +14498,18 @@ define void @s_shuffle_v2i64_v8i64__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14544,18 +14544,18 @@ define void @s_shuffle_v2i64_v8i64__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14600,24 +14600,24 @@ define void @s_shuffle_v2i64_v8i64__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14658,22 +14658,22 @@ define void @s_shuffle_v2i64_v8i64__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14740,24 +14740,24 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14798,22 +14798,22 @@ define void @s_shuffle_v2i64_v8i64__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14880,34 +14880,34 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s30 -; GFX940-NEXT: s_mov_b32 s9, s31 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s30 +; GFX942-NEXT: s_mov_b32 s9, s31 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -14974,22 +14974,22 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15112,34 +15112,34 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s30 -; GFX940-NEXT: s_mov_b32 s9, s31 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s30 +; GFX942-NEXT: s_mov_b32 s9, s31 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15262,34 +15262,34 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s30 -; GFX940-NEXT: s_mov_b32 s13, s31 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s30 +; GFX942-NEXT: s_mov_b32 s13, s31 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15328,20 +15328,20 @@ define void @s_shuffle_v2i64_v8i64__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15400,20 +15400,20 @@ define void @s_shuffle_v2i64_v8i64__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15494,18 +15494,18 @@ define void @s_shuffle_v2i64_v8i64__15_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15544,20 +15544,20 @@ define void @s_shuffle_v2i64_v8i64__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15596,20 +15596,20 @@ define void @s_shuffle_v2i64_v8i64__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15644,18 +15644,18 @@ define void @s_shuffle_v2i64_v8i64__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15712,20 +15712,20 @@ define void @s_shuffle_v2i64_v8i64__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15782,20 +15782,20 @@ define void @s_shuffle_v2i64_v8i64__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15833,18 +15833,18 @@ define void @s_shuffle_v2i64_v8i64__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15882,20 +15882,20 @@ define void @s_shuffle_v2i64_v8i64__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15933,20 +15933,20 @@ define void @s_shuffle_v2i64_v8i64__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15984,20 +15984,20 @@ define void @s_shuffle_v2i64_v8i64__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16031,18 +16031,18 @@ define void @s_shuffle_v2i64_v8i64__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16086,24 +16086,24 @@ define void @s_shuffle_v2i64_v8i64__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16222,22 +16222,22 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16282,24 +16282,24 @@ define void @s_shuffle_v2i64_v8i64__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16344,32 +16344,32 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16414,24 +16414,24 @@ define void @s_shuffle_v2i64_v8i64__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16476,24 +16476,24 @@ define void @s_shuffle_v2i64_v8i64__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s18, s0 -; GFX940-NEXT: s_mov_b32 s19, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[16:17] -; GFX940-NEXT: s_mov_b64 s[10:11], s[18:19] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s18, s0 +; GFX942-NEXT: s_mov_b32 s19, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16524,17 +16524,17 @@ define void @s_shuffle_v2i64_v8i64__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16564,17 +16564,17 @@ define void @s_shuffle_v2i64_v8i64__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16669,18 +16669,18 @@ define void @s_shuffle_v2i64_v8i64__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16737,20 +16737,20 @@ define void @s_shuffle_v2i64_v8i64__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16799,17 +16799,17 @@ define void @s_shuffle_v2i64_v8i64__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16849,22 +16849,22 @@ define void @s_shuffle_v2i64_v8i64__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -16983,22 +16983,22 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17039,22 +17039,22 @@ define void @s_shuffle_v2i64_v8i64__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17099,32 +17099,32 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17165,22 +17165,22 @@ define void @s_shuffle_v2i64_v8i64__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17225,24 +17225,24 @@ define void @s_shuffle_v2i64_v8i64__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s18, s2 -; GFX940-NEXT: s_mov_b32 s19, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[16:17] -; GFX940-NEXT: s_mov_b64 s[10:11], s[18:19] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s18, s2 +; GFX942-NEXT: s_mov_b32 s19, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17277,18 +17277,18 @@ define void @s_shuffle_v2i64_v8i64__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17345,20 +17345,20 @@ define void @s_shuffle_v2i64_v8i64__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17415,20 +17415,20 @@ define void @s_shuffle_v2i64_v8i64__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17466,18 +17466,18 @@ define void @s_shuffle_v2i64_v8i64__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17515,20 +17515,20 @@ define void @s_shuffle_v2i64_v8i64__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17566,20 +17566,20 @@ define void @s_shuffle_v2i64_v8i64__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17617,20 +17617,20 @@ define void @s_shuffle_v2i64_v8i64__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17664,18 +17664,18 @@ define void @s_shuffle_v2i64_v8i64__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17741,24 +17741,24 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17821,33 +17821,33 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17914,24 +17914,24 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -17976,22 +17976,22 @@ define void @s_shuffle_v2i64_v8i64__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18058,24 +18058,24 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18120,24 +18120,24 @@ define void @s_shuffle_v2i64_v8i64__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s22, s4 -; GFX940-NEXT: s_mov_b32 s23, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[20:21] -; GFX940-NEXT: s_mov_b64 s[10:11], s[22:23] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s22, s4 +; GFX942-NEXT: s_mov_b32 s23, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18168,17 +18168,17 @@ define void @s_shuffle_v2i64_v8i64__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18246,17 +18246,17 @@ define void @s_shuffle_v2i64_v8i64__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18313,18 +18313,18 @@ define void @s_shuffle_v2i64_v8i64__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18381,20 +18381,20 @@ define void @s_shuffle_v2i64_v8i64__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18443,17 +18443,17 @@ define void @s_shuffle_v2i64_v8i64__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18493,22 +18493,22 @@ define void @s_shuffle_v2i64_v8i64__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18571,33 +18571,33 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18638,22 +18638,22 @@ define void @s_shuffle_v2i64_v8i64__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18698,22 +18698,22 @@ define void @s_shuffle_v2i64_v8i64__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18754,22 +18754,22 @@ define void @s_shuffle_v2i64_v8i64__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18814,24 +18814,24 @@ define void @s_shuffle_v2i64_v8i64__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s22, s6 -; GFX940-NEXT: s_mov_b32 s23, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[20:21] -; GFX940-NEXT: s_mov_b64 s[10:11], s[22:23] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s22, s6 +; GFX942-NEXT: s_mov_b32 s23, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -18866,18 +18866,18 @@ define void @s_shuffle_v2i64_v8i64__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s8 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s8 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18995,18 +18995,18 @@ define void @s_shuffle_v2i64_v8i64__4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s8 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s8 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19065,20 +19065,20 @@ define void @s_shuffle_v2i64_v8i64__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s8 -; GFX940-NEXT: s_mov_b32 s15, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s8 +; GFX942-NEXT: s_mov_b32 s15, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19133,18 +19133,18 @@ define void @s_shuffle_v2i64_v8i64__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s8 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s8 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19210,34 +19210,34 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19278,22 +19278,22 @@ define void @s_shuffle_v2i64_v8i64__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19360,34 +19360,34 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19454,22 +19454,22 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19536,34 +19536,34 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19630,24 +19630,24 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s26, s8 -; GFX940-NEXT: s_mov_b32 s27, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[24:25] -; GFX940-NEXT: s_mov_b64 s[10:11], s[26:27] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s26, s8 +; GFX942-NEXT: s_mov_b32 s27, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -19682,17 +19682,17 @@ define void @s_shuffle_v2i64_v8i64__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19749,18 +19749,18 @@ define void @s_shuffle_v2i64_v8i64__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19817,18 +19817,18 @@ define void @s_shuffle_v2i64_v8i64__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19862,17 +19862,17 @@ define void @s_shuffle_v2i64_v8i64__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19910,18 +19910,18 @@ define void @s_shuffle_v2i64_v8i64__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19959,20 +19959,20 @@ define void @s_shuffle_v2i64_v8i64__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20010,18 +20010,18 @@ define void @s_shuffle_v2i64_v8i64__7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20055,17 +20055,17 @@ define void @s_shuffle_v2i64_v8i64__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20131,22 +20131,22 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20187,22 +20187,22 @@ define void @s_shuffle_v2i64_v8i64__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20269,22 +20269,22 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20351,22 +20351,22 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20433,22 +20433,22 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20515,24 +20515,24 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s26, s10 -; GFX940-NEXT: s_mov_b32 s27, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[24:25] -; GFX940-NEXT: s_mov_b64 s[10:11], s[26:27] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s26, s10 +; GFX942-NEXT: s_mov_b32 s27, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -20567,18 +20567,18 @@ define void @s_shuffle_v2i64_v8i64__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20635,20 +20635,20 @@ define void @s_shuffle_v2i64_v8i64__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20705,20 +20705,20 @@ define void @s_shuffle_v2i64_v8i64__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20756,18 +20756,18 @@ define void @s_shuffle_v2i64_v8i64__4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20805,20 +20805,20 @@ define void @s_shuffle_v2i64_v8i64__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20856,20 +20856,20 @@ define void @s_shuffle_v2i64_v8i64__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20907,20 +20907,20 @@ define void @s_shuffle_v2i64_v8i64__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20954,18 +20954,18 @@ define void @s_shuffle_v2i64_v8i64__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21087,34 +21087,34 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21155,22 +21155,22 @@ define void @s_shuffle_v2i64_v8i64__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21293,34 +21293,34 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21443,22 +21443,22 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21581,34 +21581,34 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21731,34 +21731,34 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s30, s12 -; GFX940-NEXT: s_mov_b32 s31, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[28:29] -; GFX940-NEXT: s_mov_b64 s[10:11], s[30:31] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s30, s12 +; GFX942-NEXT: s_mov_b32 s31, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] +; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -21793,18 +21793,18 @@ define void @s_shuffle_v2i64_v8i64__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21861,20 +21861,20 @@ define void @s_shuffle_v2i64_v8i64__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21931,20 +21931,20 @@ define void @s_shuffle_v2i64_v8i64__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21982,18 +21982,18 @@ define void @s_shuffle_v2i64_v8i64__4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -22031,20 +22031,20 @@ define void @s_shuffle_v2i64_v8i64__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -22078,18 +22078,18 @@ define void @s_shuffle_v2i64_v8i64__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -22127,20 +22127,20 @@ define void @s_shuffle_v2i64_v8i64__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -22174,18 +22174,18 @@ define void @s_shuffle_v2i64_v8i64__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -22307,34 +22307,34 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s18 -; GFX940-NEXT: s_mov_b32 s13, s19 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s18 +; GFX942-NEXT: s_mov_b32 s13, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -22375,22 +22375,22 @@ define void @s_shuffle_v2i64_v8i64__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -22513,34 +22513,34 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s22 -; GFX940-NEXT: s_mov_b32 s13, s23 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s22 +; GFX942-NEXT: s_mov_b32 s13, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -22663,22 +22663,22 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -22801,34 +22801,34 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s26 -; GFX940-NEXT: s_mov_b32 s13, s27 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s26 +; GFX942-NEXT: s_mov_b32 s13, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -22951,34 +22951,34 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s30, s14 -; GFX940-NEXT: s_mov_b32 s31, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[28:29] -; GFX940-NEXT: s_mov_b64 s[10:11], s[30:31] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s30, s14 +; GFX942-NEXT: s_mov_b32 s31, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] +; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23023,17 +23023,17 @@ define void @s_shuffle_v2i64_v8i64__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23067,18 +23067,18 @@ define void @s_shuffle_v2i64_v8i64__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23108,17 +23108,17 @@ define void @s_shuffle_v2i64_v8i64__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23152,18 +23152,18 @@ define void @s_shuffle_v2i64_v8i64__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23197,17 +23197,17 @@ define void @s_shuffle_v2i64_v8i64__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23241,18 +23241,18 @@ define void @s_shuffle_v2i64_v8i64__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23286,18 +23286,18 @@ define void @s_shuffle_v2i64_v8i64__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23331,18 +23331,18 @@ define void @s_shuffle_v2i64_v8i64__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -23394,20 +23394,20 @@ define void @s_shuffle_v2i64_v8i64__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23466,20 +23466,20 @@ define void @s_shuffle_v2i64_v8i64__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23518,18 +23518,18 @@ define void @s_shuffle_v2i64_v8i64__12_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23568,20 +23568,20 @@ define void @s_shuffle_v2i64_v8i64__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23620,20 +23620,20 @@ define void @s_shuffle_v2i64_v8i64__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23664,17 +23664,17 @@ define void @s_shuffle_v2i64_v8i64__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23715,22 +23715,22 @@ define void @s_shuffle_v2i64_v8i64__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23771,22 +23771,22 @@ define void @s_shuffle_v2i64_v8i64__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23827,22 +23827,22 @@ define void @s_shuffle_v2i64_v8i64__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23961,22 +23961,22 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24043,22 +24043,22 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24121,33 +24121,33 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24270,34 +24270,34 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s18 -; GFX940-NEXT: s_mov_b32 s15, s19 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s18 +; GFX942-NEXT: s_mov_b32 s15, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24338,22 +24338,22 @@ define void @s_shuffle_v2i64_v8i64__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s26 -; GFX940-NEXT: s_mov_b32 s9, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24384,17 +24384,17 @@ define void @s_shuffle_v2i64_v8i64__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24493,18 +24493,18 @@ define void @s_shuffle_v2i64_v8i64__12_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24563,20 +24563,20 @@ define void @s_shuffle_v2i64_v8i64__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24611,18 +24611,18 @@ define void @s_shuffle_v2i64_v8i64__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24663,22 +24663,22 @@ define void @s_shuffle_v2i64_v8i64__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24723,24 +24723,24 @@ define void @s_shuffle_v2i64_v8i64__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24781,22 +24781,22 @@ define void @s_shuffle_v2i64_v8i64__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24841,24 +24841,24 @@ define void @s_shuffle_v2i64_v8i64__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24925,22 +24925,22 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25007,23 +25007,23 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25146,34 +25146,34 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s20 -; GFX940-NEXT: s_mov_b32 s15, s21 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s20 +; GFX942-NEXT: s_mov_b32 s15, s21 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25296,34 +25296,34 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25382,20 +25382,20 @@ define void @s_shuffle_v2i64_v8i64__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25454,20 +25454,20 @@ define void @s_shuffle_v2i64_v8i64__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25506,18 +25506,18 @@ define void @s_shuffle_v2i64_v8i64__12_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25556,20 +25556,20 @@ define void @s_shuffle_v2i64_v8i64__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25608,20 +25608,20 @@ define void @s_shuffle_v2i64_v8i64__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25652,17 +25652,17 @@ define void @s_shuffle_v2i64_v8i64__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25703,22 +25703,22 @@ define void @s_shuffle_v2i64_v8i64__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25837,22 +25837,22 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25893,22 +25893,22 @@ define void @s_shuffle_v2i64_v8i64__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25971,33 +25971,33 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26064,22 +26064,22 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26120,22 +26120,22 @@ define void @s_shuffle_v2i64_v8i64__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26258,34 +26258,34 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s22 -; GFX940-NEXT: s_mov_b32 s15, s23 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s22 +; GFX942-NEXT: s_mov_b32 s15, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26326,22 +26326,22 @@ define void @s_shuffle_v2i64_v8i64__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26412,17 +26412,17 @@ define void @s_shuffle_v2i64_v8i64__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26481,18 +26481,18 @@ define void @s_shuffle_v2i64_v8i64__12_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26551,20 +26551,20 @@ define void @s_shuffle_v2i64_v8i64__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26599,18 +26599,18 @@ define void @s_shuffle_v2i64_v8i64__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s8 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s8 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26651,22 +26651,22 @@ define void @s_shuffle_v2i64_v8i64__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26711,24 +26711,24 @@ define void @s_shuffle_v2i64_v8i64__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26769,22 +26769,22 @@ define void @s_shuffle_v2i64_v8i64__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26829,24 +26829,24 @@ define void @s_shuffle_v2i64_v8i64__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26913,22 +26913,22 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26995,23 +26995,23 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27134,34 +27134,34 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s24 -; GFX940-NEXT: s_mov_b32 s15, s25 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s24 +; GFX942-NEXT: s_mov_b32 s15, s25 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27284,34 +27284,34 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s24 -; GFX940-NEXT: s_mov_b32 s11, s25 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s24 +; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27434,18 +27434,18 @@ define void @s_shuffle_v2i64_v8i64__12_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s8 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s8 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27506,20 +27506,20 @@ define void @s_shuffle_v2i64_v8i64__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s8 -; GFX940-NEXT: s_mov_b32 s15, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s8 +; GFX942-NEXT: s_mov_b32 s15, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27554,17 +27554,17 @@ define void @s_shuffle_v2i64_v8i64__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27605,22 +27605,22 @@ define void @s_shuffle_v2i64_v8i64__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27665,32 +27665,32 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27731,22 +27731,22 @@ define void @s_shuffle_v2i64_v8i64__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27791,22 +27791,22 @@ define void @s_shuffle_v2i64_v8i64__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27873,22 +27873,22 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27955,22 +27955,22 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28093,34 +28093,34 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s26 -; GFX940-NEXT: s_mov_b32 s15, s27 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s26 +; GFX942-NEXT: s_mov_b32 s15, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28243,22 +28243,22 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28317,18 +28317,18 @@ define void @s_shuffle_v2i64_v8i64__9_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28387,18 +28387,18 @@ define void @s_shuffle_v2i64_v8i64__11_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28433,17 +28433,17 @@ define void @s_shuffle_v2i64_v8i64__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28482,18 +28482,18 @@ define void @s_shuffle_v2i64_v8i64__13_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28532,20 +28532,20 @@ define void @s_shuffle_v2i64_v8i64__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28580,18 +28580,18 @@ define void @s_shuffle_v2i64_v8i64__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28632,22 +28632,22 @@ define void @s_shuffle_v2i64_v8i64__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s24 -; GFX940-NEXT: s_mov_b32 s11, s25 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s24 +; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28692,24 +28692,24 @@ define void @s_shuffle_v2i64_v8i64__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28750,22 +28750,22 @@ define void @s_shuffle_v2i64_v8i64__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s24 -; GFX940-NEXT: s_mov_b32 s11, s25 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s24 +; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28810,24 +28810,24 @@ define void @s_shuffle_v2i64_v8i64__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28894,22 +28894,22 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s24 -; GFX940-NEXT: s_mov_b32 s11, s25 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s24 +; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28976,23 +28976,23 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s24 -; GFX940-NEXT: s_mov_b32 s11, s25 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s24 +; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29115,34 +29115,34 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s28 -; GFX940-NEXT: s_mov_b32 s15, s29 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s28 +; GFX942-NEXT: s_mov_b32 s15, s29 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29265,34 +29265,34 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s28 -; GFX940-NEXT: s_mov_b32 s11, s29 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s28 +; GFX942-NEXT: s_mov_b32 s11, s29 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29351,20 +29351,20 @@ define void @s_shuffle_v2i64_v8i64__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29423,20 +29423,20 @@ define void @s_shuffle_v2i64_v8i64__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29475,18 +29475,18 @@ define void @s_shuffle_v2i64_v8i64__12_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29525,20 +29525,20 @@ define void @s_shuffle_v2i64_v8i64__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29577,20 +29577,20 @@ define void @s_shuffle_v2i64_v8i64__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29625,18 +29625,18 @@ define void @s_shuffle_v2i64_v8i64__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29677,22 +29677,22 @@ define void @s_shuffle_v2i64_v8i64__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s26 -; GFX940-NEXT: s_mov_b32 s11, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s26 +; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29737,24 +29737,24 @@ define void @s_shuffle_v2i64_v8i64__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s16, s2 -; GFX940-NEXT: s_mov_b32 s17, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[16:17] -; GFX940-NEXT: s_mov_b64 s[10:11], s[18:19] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s16, s2 +; GFX942-NEXT: s_mov_b32 s17, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29795,22 +29795,22 @@ define void @s_shuffle_v2i64_v8i64__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s26 -; GFX940-NEXT: s_mov_b32 s11, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s26 +; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29855,24 +29855,24 @@ define void @s_shuffle_v2i64_v8i64__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s20, s6 -; GFX940-NEXT: s_mov_b32 s21, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[20:21] -; GFX940-NEXT: s_mov_b64 s[10:11], s[22:23] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s20, s6 +; GFX942-NEXT: s_mov_b32 s21, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29939,22 +29939,22 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s26 -; GFX940-NEXT: s_mov_b32 s11, s27 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s26 +; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30021,24 +30021,24 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:27] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s24, s10 -; GFX940-NEXT: s_mov_b32 s25, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[24:25] -; GFX940-NEXT: s_mov_b64 s[10:11], s[26:27] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s24, s10 +; GFX942-NEXT: s_mov_b32 s25, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30161,34 +30161,34 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s30 -; GFX940-NEXT: s_mov_b32 s15, s31 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s30 +; GFX942-NEXT: s_mov_b32 s15, s31 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30311,34 +30311,34 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_store_dword off, v0, s32 sc0 sc1 ; 4-byte Folded Spill -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: v_writelane_b32 v0, s30, 0 -; GFX940-NEXT: v_writelane_b32 v0, s31, 1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:31] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s28, s14 -; GFX940-NEXT: s_mov_b32 s29, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[28:29] -; GFX940-NEXT: s_mov_b64 s[10:11], s[30:31] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_readlane_b32 s31, v0, 1 -; GFX940-NEXT: v_readlane_b32 s30, v0, 0 -; GFX940-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX940-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX940-NEXT: s_mov_b64 exec, s[0:1] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s28, s14 +; GFX942-NEXT: s_mov_b32 s29, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] +; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30397,20 +30397,20 @@ define void @s_shuffle_v2i64_v8i64__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30469,20 +30469,20 @@ define void @s_shuffle_v2i64_v8i64__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30521,18 +30521,18 @@ define void @s_shuffle_v2i64_v8i64__12_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30571,20 +30571,20 @@ define void @s_shuffle_v2i64_v8i64__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -30619,18 +30619,18 @@ define void @s_shuffle_v2i64_v8i64__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2i64_v8i64__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX940-NEXT: s_mov_b64 s[10:11], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2i64_v8i64__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll index 2f13989ac01a5..7f8f2dbbb09a1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p0_v2p0__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p0_v2p0__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -81,18 +81,18 @@ define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -137,18 +137,18 @@ define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v2p0_v2p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v2p0_v2p0__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v2p0_v2p0__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v2p0_v2p0__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -394,18 +394,18 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -439,18 +439,18 @@ define void @v_shuffle_v2p0_v2p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> zeroinitializer store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -484,19 +484,19 @@ define void @v_shuffle_v2p0_v2p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -530,18 +530,18 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -571,16 +571,16 @@ define void @v_shuffle_v2p0_v2p0__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -610,16 +610,16 @@ define void @v_shuffle_v2p0_v2p0__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -653,18 +653,18 @@ define void @v_shuffle_v2p0_v2p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -694,16 +694,16 @@ define void @v_shuffle_v2p0_v2p0__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -744,16 +744,16 @@ define void @v_shuffle_v2p0_v2p0__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -787,18 +787,18 @@ define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -839,16 +839,16 @@ define void @v_shuffle_v2p0_v2p0__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -889,22 +889,22 @@ define void @v_shuffle_v2p0_v2p0__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -945,22 +945,22 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -991,16 +991,16 @@ define void @v_shuffle_v2p0_v2p0__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v2p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v2p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1045,17 +1045,17 @@ define void @s_shuffle_v2p0_v2p0__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1089,18 +1089,18 @@ define void @s_shuffle_v2p0_v2p0__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1148,18 +1148,18 @@ define void @s_shuffle_v2p0_v2p0__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1204,23 +1204,23 @@ define void @s_shuffle_v2p0_v2p0__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1261,21 +1261,21 @@ define void @s_shuffle_v2p0_v2p0__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1314,20 +1314,20 @@ define void @s_shuffle_v2p0_v2p0__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1382,18 +1382,18 @@ define void @s_shuffle_v2p0_v2p0__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1450,20 +1450,20 @@ define void @s_shuffle_v2p0_v2p0__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1497,18 +1497,18 @@ define void @s_shuffle_v2p0_v2p0__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1538,17 +1538,17 @@ define void @s_shuffle_v2p0_v2p0__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1578,17 +1578,17 @@ define void @s_shuffle_v2p0_v2p0__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1637,17 +1637,17 @@ define void @s_shuffle_v2p0_v2p0__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1691,17 +1691,17 @@ define void @s_shuffle_v2p0_v2p0__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1735,18 +1735,18 @@ define void @s_shuffle_v2p0_v2p0__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1790,17 +1790,17 @@ define void @s_shuffle_v2p0_v2p0__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1841,21 +1841,21 @@ define void @s_shuffle_v2p0_v2p0__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1896,21 +1896,21 @@ define void @s_shuffle_v2p0_v2p0__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1941,17 +1941,17 @@ define void @s_shuffle_v2p0_v2p0__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v2p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v2p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll index 6c3403df9aaec..27a6cf11c4cb1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p0_v3p0__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p0_v3p0__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -77,16 +77,16 @@ define void @v_shuffle_v2p0_v3p0__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -120,18 +120,18 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -172,16 +172,16 @@ define void @v_shuffle_v2p0_v3p0__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -216,18 +216,18 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -272,24 +272,24 @@ define void @v_shuffle_v2p0_v3p0__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -330,22 +330,22 @@ define void @v_shuffle_v2p0_v3p0__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -386,22 +386,22 @@ define void @v_shuffle_v2p0_v3p0__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -440,20 +440,20 @@ define void @v_shuffle_v2p0_v3p0__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -488,18 +488,18 @@ define void @v_shuffle_v2p0_v3p0__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -534,18 +534,18 @@ define void @v_shuffle_v2p0_v3p0__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -580,18 +580,18 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -625,18 +625,18 @@ define void @v_shuffle_v2p0_v3p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> zeroinitializer store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -670,18 +670,18 @@ define void @v_shuffle_v2p0_v3p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -719,20 +719,20 @@ define void @v_shuffle_v2p0_v3p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -766,18 +766,18 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -817,22 +817,22 @@ define void @v_shuffle_v2p0_v3p0__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -863,16 +863,16 @@ define void @v_shuffle_v2p0_v3p0__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -902,16 +902,16 @@ define void @v_shuffle_v2p0_v3p0__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -945,18 +945,18 @@ define void @v_shuffle_v2p0_v3p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -990,18 +990,18 @@ define void @v_shuffle_v2p0_v3p0__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1031,16 +1031,16 @@ define void @v_shuffle_v2p0_v3p0__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1080,22 +1080,22 @@ define void @v_shuffle_v2p0_v3p0__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1126,16 +1126,16 @@ define void @v_shuffle_v2p0_v3p0__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1169,18 +1169,18 @@ define void @v_shuffle_v2p0_v3p0__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1210,16 +1210,16 @@ define void @v_shuffle_v2p0_v3p0__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1253,18 +1253,18 @@ define void @v_shuffle_v2p0_v3p0__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1294,16 +1294,16 @@ define void @v_shuffle_v2p0_v3p0__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1343,22 +1343,22 @@ define void @v_shuffle_v2p0_v3p0__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1400,16 +1400,16 @@ define void @v_shuffle_v2p0_v3p0__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1439,16 +1439,16 @@ define void @v_shuffle_v2p0_v3p0__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1482,18 +1482,18 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1538,18 +1538,18 @@ define void @v_shuffle_v2p0_v3p0__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1580,16 +1580,16 @@ define void @v_shuffle_v2p0_v3p0__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1630,22 +1630,22 @@ define void @v_shuffle_v2p0_v3p0__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1686,22 +1686,22 @@ define void @v_shuffle_v2p0_v3p0__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1742,22 +1742,22 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1788,16 +1788,16 @@ define void @v_shuffle_v2p0_v3p0__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1832,18 +1832,18 @@ define void @v_shuffle_v2p0_v3p0__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1874,16 +1874,16 @@ define void @v_shuffle_v2p0_v3p0__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1924,22 +1924,22 @@ define void @v_shuffle_v2p0_v3p0__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -1980,22 +1980,22 @@ define void @v_shuffle_v2p0_v3p0__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2036,22 +2036,22 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2086,18 +2086,18 @@ define void @v_shuffle_v2p0_v3p0__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2128,16 +2128,16 @@ define void @v_shuffle_v2p0_v3p0__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v3p0__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v3p0__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2182,17 +2182,17 @@ define void @s_shuffle_v2p0_v3p0__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2226,18 +2226,18 @@ define void @s_shuffle_v2p0_v3p0__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2267,18 +2267,18 @@ define void @s_shuffle_v2p0_v3p0__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2326,18 +2326,18 @@ define void @s_shuffle_v2p0_v3p0__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2368,18 +2368,18 @@ define void @s_shuffle_v2p0_v3p0__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2424,21 +2424,21 @@ define void @s_shuffle_v2p0_v3p0__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2475,21 +2475,21 @@ define void @s_shuffle_v2p0_v3p0__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2530,23 +2530,23 @@ define void @s_shuffle_v2p0_v3p0__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2581,20 +2581,20 @@ define void @s_shuffle_v2p0_v3p0__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2653,20 +2653,20 @@ define void @s_shuffle_v2p0_v3p0__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2701,18 +2701,18 @@ define void @s_shuffle_v2p0_v3p0__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2769,20 +2769,20 @@ define void @s_shuffle_v2p0_v3p0__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2816,20 +2816,20 @@ define void @s_shuffle_v2p0_v3p0__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2863,18 +2863,18 @@ define void @s_shuffle_v2p0_v3p0__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2918,23 +2918,23 @@ define void @s_shuffle_v2p0_v3p0__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2965,17 +2965,17 @@ define void @s_shuffle_v2p0_v3p0__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3005,17 +3005,17 @@ define void @s_shuffle_v2p0_v3p0__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3083,17 +3083,17 @@ define void @s_shuffle_v2p0_v3p0__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3133,21 +3133,21 @@ define void @s_shuffle_v2p0_v3p0__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3182,18 +3182,18 @@ define void @s_shuffle_v2p0_v3p0__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3250,20 +3250,20 @@ define void @s_shuffle_v2p0_v3p0__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3301,20 +3301,20 @@ define void @s_shuffle_v2p0_v3p0__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3348,18 +3348,18 @@ define void @s_shuffle_v2p0_v3p0__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3403,23 +3403,23 @@ define void @s_shuffle_v2p0_v3p0__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3464,17 +3464,17 @@ define void @s_shuffle_v2p0_v3p0__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3508,18 +3508,18 @@ define void @s_shuffle_v2p0_v3p0__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3549,18 +3549,18 @@ define void @s_shuffle_v2p0_v3p0__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3612,20 +3612,20 @@ define void @s_shuffle_v2p0_v3p0__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3656,17 +3656,17 @@ define void @s_shuffle_v2p0_v3p0__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3707,21 +3707,21 @@ define void @s_shuffle_v2p0_v3p0__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3762,21 +3762,21 @@ define void @s_shuffle_v2p0_v3p0__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3817,21 +3817,21 @@ define void @s_shuffle_v2p0_v3p0__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3862,17 +3862,17 @@ define void @s_shuffle_v2p0_v3p0__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3927,18 +3927,18 @@ define void @s_shuffle_v2p0_v3p0__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3979,21 +3979,21 @@ define void @s_shuffle_v2p0_v3p0__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -4038,23 +4038,23 @@ define void @s_shuffle_v2p0_v3p0__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -4095,23 +4095,23 @@ define void @s_shuffle_v2p0_v3p0__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -4170,20 +4170,20 @@ define void @s_shuffle_v2p0_v3p0__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v3p0__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll index 0b0570a328201..ae31524ebaa7f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p0_v4p0__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p0_v4p0__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -77,16 +77,16 @@ define void @v_shuffle_v2p0_v4p0__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -116,16 +116,16 @@ define void @v_shuffle_v2p0_v4p0__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -159,18 +159,18 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -211,16 +211,16 @@ define void @v_shuffle_v2p0_v4p0__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -251,16 +251,16 @@ define void @v_shuffle_v2p0_v4p0__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -295,18 +295,18 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -351,24 +351,24 @@ define void @v_shuffle_v2p0_v4p0__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -409,22 +409,22 @@ define void @v_shuffle_v2p0_v4p0__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -465,22 +465,22 @@ define void @v_shuffle_v2p0_v4p0__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -521,22 +521,22 @@ define void @v_shuffle_v2p0_v4p0__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -575,20 +575,20 @@ define void @v_shuffle_v2p0_v4p0__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -623,18 +623,18 @@ define void @v_shuffle_v2p0_v4p0__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -669,18 +669,18 @@ define void @v_shuffle_v2p0_v4p0__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -715,18 +715,18 @@ define void @v_shuffle_v2p0_v4p0__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -761,18 +761,18 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -806,18 +806,18 @@ define void @v_shuffle_v2p0_v4p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> zeroinitializer store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -851,18 +851,18 @@ define void @v_shuffle_v2p0_v4p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -896,18 +896,18 @@ define void @v_shuffle_v2p0_v4p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -945,20 +945,20 @@ define void @v_shuffle_v2p0_v4p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -992,18 +992,18 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1043,22 +1043,22 @@ define void @v_shuffle_v2p0_v4p0__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1099,22 +1099,22 @@ define void @v_shuffle_v2p0_v4p0__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1145,16 +1145,16 @@ define void @v_shuffle_v2p0_v4p0__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1184,16 +1184,16 @@ define void @v_shuffle_v2p0_v4p0__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1227,18 +1227,18 @@ define void @v_shuffle_v2p0_v4p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1272,18 +1272,18 @@ define void @v_shuffle_v2p0_v4p0__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1317,18 +1317,18 @@ define void @v_shuffle_v2p0_v4p0__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1358,16 +1358,16 @@ define void @v_shuffle_v2p0_v4p0__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1407,22 +1407,22 @@ define void @v_shuffle_v2p0_v4p0__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1463,22 +1463,22 @@ define void @v_shuffle_v2p0_v4p0__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1509,16 +1509,16 @@ define void @v_shuffle_v2p0_v4p0__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1552,18 +1552,18 @@ define void @v_shuffle_v2p0_v4p0__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1593,16 +1593,16 @@ define void @v_shuffle_v2p0_v4p0__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1636,18 +1636,18 @@ define void @v_shuffle_v2p0_v4p0__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1681,18 +1681,18 @@ define void @v_shuffle_v2p0_v4p0__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1722,16 +1722,16 @@ define void @v_shuffle_v2p0_v4p0__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1771,22 +1771,22 @@ define void @v_shuffle_v2p0_v4p0__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1827,22 +1827,22 @@ define void @v_shuffle_v2p0_v4p0__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -1873,16 +1873,16 @@ define void @v_shuffle_v2p0_v4p0__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1916,18 +1916,18 @@ define void @v_shuffle_v2p0_v4p0__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1961,18 +1961,18 @@ define void @v_shuffle_v2p0_v4p0__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2002,16 +2002,16 @@ define void @v_shuffle_v2p0_v4p0__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2045,18 +2045,18 @@ define void @v_shuffle_v2p0_v4p0__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2086,16 +2086,16 @@ define void @v_shuffle_v2p0_v4p0__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2135,22 +2135,22 @@ define void @v_shuffle_v2p0_v4p0__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2191,22 +2191,22 @@ define void @v_shuffle_v2p0_v4p0__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2248,16 +2248,16 @@ define void @v_shuffle_v2p0_v4p0__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2287,16 +2287,16 @@ define void @v_shuffle_v2p0_v4p0__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2326,16 +2326,16 @@ define void @v_shuffle_v2p0_v4p0__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2369,18 +2369,18 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> store <2 x ptr> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2425,18 +2425,18 @@ define void @v_shuffle_v2p0_v4p0__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2471,18 +2471,18 @@ define void @v_shuffle_v2p0_v4p0__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2513,16 +2513,16 @@ define void @v_shuffle_v2p0_v4p0__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2563,22 +2563,22 @@ define void @v_shuffle_v2p0_v4p0__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v2p0_v4p0__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2675,22 +2675,22 @@ define void @v_shuffle_v2p0_v4p0__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2731,22 +2731,22 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2777,16 +2777,16 @@ define void @v_shuffle_v2p0_v4p0__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2821,18 +2821,18 @@ define void @v_shuffle_v2p0_v4p0__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2867,18 +2867,18 @@ define void @v_shuffle_v2p0_v4p0__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2909,16 +2909,16 @@ define void @v_shuffle_v2p0_v4p0__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -2959,22 +2959,22 @@ define void @v_shuffle_v2p0_v4p0__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3015,22 +3015,22 @@ define void @v_shuffle_v2p0_v4p0__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3071,22 +3071,22 @@ define void @v_shuffle_v2p0_v4p0__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3127,22 +3127,22 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3177,18 +3177,18 @@ define void @v_shuffle_v2p0_v4p0__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3219,16 +3219,16 @@ define void @v_shuffle_v2p0_v4p0__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3263,18 +3263,18 @@ define void @v_shuffle_v2p0_v4p0__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3305,16 +3305,16 @@ define void @v_shuffle_v2p0_v4p0__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3355,22 +3355,22 @@ define void @v_shuffle_v2p0_v4p0__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3411,22 +3411,22 @@ define void @v_shuffle_v2p0_v4p0__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3467,22 +3467,22 @@ define void @v_shuffle_v2p0_v4p0__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3523,22 +3523,22 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3573,18 +3573,18 @@ define void @v_shuffle_v2p0_v4p0__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3619,18 +3619,18 @@ define void @v_shuffle_v2p0_v4p0__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3661,16 +3661,16 @@ define void @v_shuffle_v2p0_v4p0__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p0_v4p0__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p0_v4p0__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3715,17 +3715,17 @@ define void @s_shuffle_v2p0_v4p0__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3759,18 +3759,18 @@ define void @s_shuffle_v2p0_v4p0__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3800,17 +3800,17 @@ define void @s_shuffle_v2p0_v4p0__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3844,18 +3844,18 @@ define void @s_shuffle_v2p0_v4p0__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3903,18 +3903,18 @@ define void @s_shuffle_v2p0_v4p0__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3945,17 +3945,17 @@ define void @s_shuffle_v2p0_v4p0__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -3990,18 +3990,18 @@ define void @s_shuffle_v2p0_v4p0__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4046,24 +4046,24 @@ define void @s_shuffle_v2p0_v4p0__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4104,21 +4104,21 @@ define void @s_shuffle_v2p0_v4p0__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4163,23 +4163,23 @@ define void @s_shuffle_v2p0_v4p0__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4220,22 +4220,22 @@ define void @s_shuffle_v2p0_v4p0__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4274,20 +4274,20 @@ define void @s_shuffle_v2p0_v4p0__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4346,20 +4346,20 @@ define void @s_shuffle_v2p0_v4p0__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4414,18 +4414,18 @@ define void @s_shuffle_v2p0_v4p0__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4482,20 +4482,20 @@ define void @s_shuffle_v2p0_v4p0__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4552,20 +4552,20 @@ define void @s_shuffle_v2p0_v4p0__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4599,18 +4599,18 @@ define void @s_shuffle_v2p0_v4p0__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4654,24 +4654,24 @@ define void @s_shuffle_v2p0_v4p0__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4712,22 +4712,22 @@ define void @s_shuffle_v2p0_v4p0__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4758,17 +4758,17 @@ define void @s_shuffle_v2p0_v4p0__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4798,17 +4798,17 @@ define void @s_shuffle_v2p0_v4p0__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4895,17 +4895,17 @@ define void @s_shuffle_v2p0_v4p0__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4945,21 +4945,21 @@ define void @s_shuffle_v2p0_v4p0__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5000,22 +5000,22 @@ define void @s_shuffle_v2p0_v4p0__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5050,18 +5050,18 @@ define void @s_shuffle_v2p0_v4p0__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5118,20 +5118,20 @@ define void @s_shuffle_v2p0_v4p0__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5188,20 +5188,20 @@ define void @s_shuffle_v2p0_v4p0__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5235,18 +5235,18 @@ define void @s_shuffle_v2p0_v4p0__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5290,23 +5290,23 @@ define void @s_shuffle_v2p0_v4p0__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5347,22 +5347,22 @@ define void @s_shuffle_v2p0_v4p0__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5393,17 +5393,17 @@ define void @s_shuffle_v2p0_v4p0__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5471,17 +5471,17 @@ define void @s_shuffle_v2p0_v4p0__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5530,17 +5530,17 @@ define void @s_shuffle_v2p0_v4p0__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5580,22 +5580,22 @@ define void @s_shuffle_v2p0_v4p0__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5636,22 +5636,22 @@ define void @s_shuffle_v2p0_v4p0__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5696,17 +5696,17 @@ define void @s_shuffle_v2p0_v4p0__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5740,18 +5740,18 @@ define void @s_shuffle_v2p0_v4p0__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5781,17 +5781,17 @@ define void @s_shuffle_v2p0_v4p0__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5825,18 +5825,18 @@ define void @s_shuffle_v2p0_v4p0__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5888,20 +5888,20 @@ define void @s_shuffle_v2p0_v4p0__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -5952,17 +5952,17 @@ define void @s_shuffle_v2p0_v4p0__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6003,21 +6003,21 @@ define void @s_shuffle_v2p0_v4p0__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6058,21 +6058,21 @@ define void @s_shuffle_v2p0_v4p0__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6113,22 +6113,22 @@ define void @s_shuffle_v2p0_v4p0__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6169,21 +6169,21 @@ define void @s_shuffle_v2p0_v4p0__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6214,17 +6214,17 @@ define void @s_shuffle_v2p0_v4p0__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6299,18 +6299,18 @@ define void @s_shuffle_v2p0_v4p0__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6351,21 +6351,21 @@ define void @s_shuffle_v2p0_v4p0__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6410,23 +6410,23 @@ define void @s_shuffle_v2p0_v4p0__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6467,22 +6467,22 @@ define void @s_shuffle_v2p0_v4p0__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6527,23 +6527,23 @@ define void @s_shuffle_v2p0_v4p0__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6602,20 +6602,20 @@ define void @s_shuffle_v2p0_v4p0__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6666,17 +6666,17 @@ define void @s_shuffle_v2p0_v4p0__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6717,21 +6717,21 @@ define void @s_shuffle_v2p0_v4p0__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6772,22 +6772,22 @@ define void @s_shuffle_v2p0_v4p0__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6828,22 +6828,22 @@ define void @s_shuffle_v2p0_v4p0__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6884,22 +6884,22 @@ define void @s_shuffle_v2p0_v4p0__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6970,17 +6970,17 @@ define void @s_shuffle_v2p0_v4p0__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p0_v4p0__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll index 6d5005a899832..299dfba482953 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v2p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p3_v2p3__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p3_v2p3__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v2p3_v2p3__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v2p3_v2p3__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v2p3_v2p3__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v2p3_v2p3__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -282,17 +282,17 @@ define void @v_shuffle_v2p3_v2p3__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -325,17 +325,17 @@ define void @v_shuffle_v2p3_v2p3__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -368,17 +368,17 @@ define void @v_shuffle_v2p3_v2p3__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -410,17 +410,17 @@ define void @v_shuffle_v2p3_v2p3__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> zeroinitializer store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -452,17 +452,17 @@ define void @v_shuffle_v2p3_v2p3__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -494,17 +494,17 @@ define void @v_shuffle_v2p3_v2p3__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -534,16 +534,16 @@ define void @v_shuffle_v2p3_v2p3__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -573,16 +573,16 @@ define void @v_shuffle_v2p3_v2p3__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -614,17 +614,17 @@ define void @v_shuffle_v2p3_v2p3__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -654,16 +654,16 @@ define void @v_shuffle_v2p3_v2p3__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -704,16 +704,16 @@ define void @v_shuffle_v2p3_v2p3__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -745,17 +745,17 @@ define void @v_shuffle_v2p3_v2p3__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -796,16 +796,16 @@ define void @v_shuffle_v2p3_v2p3__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -844,21 +844,21 @@ define void @v_shuffle_v2p3_v2p3__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -897,21 +897,21 @@ define void @v_shuffle_v2p3_v2p3__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -942,16 +942,16 @@ define void @v_shuffle_v2p3_v2p3__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v2p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v2p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -996,17 +996,17 @@ define void @s_shuffle_v2p3_v2p3__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1038,17 +1038,17 @@ define void @s_shuffle_v2p3_v2p3__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1094,17 +1094,17 @@ define void @s_shuffle_v2p3_v2p3__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1145,21 +1145,21 @@ define void @s_shuffle_v2p3_v2p3__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1198,20 +1198,20 @@ define void @s_shuffle_v2p3_v2p3__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1246,18 +1246,18 @@ define void @s_shuffle_v2p3_v2p3__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1309,17 +1309,17 @@ define void @s_shuffle_v2p3_v2p3__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1371,18 +1371,18 @@ define void @s_shuffle_v2p3_v2p3__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1414,17 +1414,17 @@ define void @s_shuffle_v2p3_v2p3__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1454,17 +1454,17 @@ define void @s_shuffle_v2p3_v2p3__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1494,17 +1494,17 @@ define void @s_shuffle_v2p3_v2p3__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1552,17 +1552,17 @@ define void @s_shuffle_v2p3_v2p3__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1606,17 +1606,17 @@ define void @s_shuffle_v2p3_v2p3__0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1648,17 +1648,17 @@ define void @s_shuffle_v2p3_v2p3__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -1702,17 +1702,17 @@ define void @s_shuffle_v2p3_v2p3__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1751,20 +1751,20 @@ define void @s_shuffle_v2p3_v2p3__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1803,20 +1803,20 @@ define void @s_shuffle_v2p3_v2p3__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1847,17 +1847,17 @@ define void @s_shuffle_v2p3_v2p3__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v2p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v2p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll index 2c8f2952fd106..13e3d94c35446 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v3p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p3_v3p3__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p3_v3p3__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2p3_v3p3__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -120,17 +120,17 @@ define void @v_shuffle_v2p3_v3p3__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -172,17 +172,17 @@ define void @v_shuffle_v2p3_v3p3__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -215,17 +215,17 @@ define void @v_shuffle_v2p3_v3p3__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -266,22 +266,22 @@ define void @v_shuffle_v2p3_v3p3__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v2p3_v3p3__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -374,21 +374,21 @@ define void @v_shuffle_v2p3_v3p3__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -422,18 +422,18 @@ define void @v_shuffle_v2p3_v3p3__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -466,17 +466,17 @@ define void @v_shuffle_v2p3_v3p3__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -510,18 +510,18 @@ define void @v_shuffle_v2p3_v3p3__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -554,17 +554,17 @@ define void @v_shuffle_v2p3_v3p3__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -596,17 +596,17 @@ define void @v_shuffle_v2p3_v3p3__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> zeroinitializer store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -638,17 +638,17 @@ define void @v_shuffle_v2p3_v3p3__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -681,18 +681,18 @@ define void @v_shuffle_v2p3_v3p3__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -724,17 +724,17 @@ define void @v_shuffle_v2p3_v3p3__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -772,21 +772,21 @@ define void @v_shuffle_v2p3_v3p3__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -817,16 +817,16 @@ define void @v_shuffle_v2p3_v3p3__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -856,16 +856,16 @@ define void @v_shuffle_v2p3_v3p3__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -897,17 +897,17 @@ define void @v_shuffle_v2p3_v3p3__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -939,17 +939,17 @@ define void @v_shuffle_v2p3_v3p3__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -979,16 +979,16 @@ define void @v_shuffle_v2p3_v3p3__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1026,21 +1026,21 @@ define void @v_shuffle_v2p3_v3p3__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1072,17 +1072,17 @@ define void @v_shuffle_v2p3_v3p3__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1114,17 +1114,17 @@ define void @v_shuffle_v2p3_v3p3__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1156,18 +1156,18 @@ define void @v_shuffle_v2p3_v3p3__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1200,18 +1200,18 @@ define void @v_shuffle_v2p3_v3p3__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1242,17 +1242,17 @@ define void @v_shuffle_v2p3_v3p3__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1291,21 +1291,21 @@ define void @v_shuffle_v2p3_v3p3__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1347,16 +1347,16 @@ define void @v_shuffle_v2p3_v3p3__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1387,17 +1387,17 @@ define void @v_shuffle_v2p3_v3p3__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1429,17 +1429,17 @@ define void @v_shuffle_v2p3_v3p3__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1482,17 +1482,17 @@ define void @v_shuffle_v2p3_v3p3__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1523,16 +1523,16 @@ define void @v_shuffle_v2p3_v3p3__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1571,21 +1571,21 @@ define void @v_shuffle_v2p3_v3p3__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1624,21 +1624,21 @@ define void @v_shuffle_v2p3_v3p3__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v5, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1677,21 +1677,21 @@ define void @v_shuffle_v2p3_v3p3__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1722,16 +1722,16 @@ define void @v_shuffle_v2p3_v3p3__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1764,17 +1764,17 @@ define void @v_shuffle_v2p3_v3p3__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1806,17 +1806,17 @@ define void @v_shuffle_v2p3_v3p3__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1855,21 +1855,21 @@ define void @v_shuffle_v2p3_v3p3__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1909,21 +1909,21 @@ define void @v_shuffle_v2p3_v3p3__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1963,21 +1963,21 @@ define void @v_shuffle_v2p3_v3p3__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2010,17 +2010,17 @@ define void @v_shuffle_v2p3_v3p3__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2053,18 +2053,18 @@ define void @v_shuffle_v2p3_v3p3__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v3p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v3p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v3, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2109,17 +2109,17 @@ define void @s_shuffle_v2p3_v3p3__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2151,17 +2151,17 @@ define void @s_shuffle_v2p3_v3p3__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2193,17 +2193,17 @@ define void @s_shuffle_v2p3_v3p3__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2249,17 +2249,17 @@ define void @s_shuffle_v2p3_v3p3__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2292,17 +2292,17 @@ define void @s_shuffle_v2p3_v3p3__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2343,21 +2343,21 @@ define void @s_shuffle_v2p3_v3p3__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2396,20 +2396,20 @@ define void @s_shuffle_v2p3_v3p3__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2450,21 +2450,21 @@ define void @s_shuffle_v2p3_v3p3__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2499,18 +2499,18 @@ define void @s_shuffle_v2p3_v3p3__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2564,18 +2564,18 @@ define void @s_shuffle_v2p3_v3p3__5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2608,17 +2608,17 @@ define void @s_shuffle_v2p3_v3p3__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2670,18 +2670,18 @@ define void @s_shuffle_v2p3_v3p3__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2715,18 +2715,18 @@ define void @s_shuffle_v2p3_v3p3__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2758,17 +2758,17 @@ define void @s_shuffle_v2p3_v3p3__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2808,21 +2808,21 @@ define void @s_shuffle_v2p3_v3p3__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2853,17 +2853,17 @@ define void @s_shuffle_v2p3_v3p3__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2893,17 +2893,17 @@ define void @s_shuffle_v2p3_v3p3__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -2969,17 +2969,17 @@ define void @s_shuffle_v2p3_v3p3__3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3017,20 +3017,20 @@ define void @s_shuffle_v2p3_v3p3__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3063,17 +3063,17 @@ define void @s_shuffle_v2p3_v3p3__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3125,18 +3125,18 @@ define void @s_shuffle_v2p3_v3p3__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3170,18 +3170,18 @@ define void @s_shuffle_v2p3_v3p3__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3213,17 +3213,17 @@ define void @s_shuffle_v2p3_v3p3__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3263,21 +3263,21 @@ define void @s_shuffle_v2p3_v3p3__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3322,17 +3322,17 @@ define void @s_shuffle_v2p3_v3p3__0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3364,17 +3364,17 @@ define void @s_shuffle_v2p3_v3p3__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3406,17 +3406,17 @@ define void @s_shuffle_v2p3_v3p3__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3464,18 +3464,18 @@ define void @s_shuffle_v2p3_v3p3__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3506,17 +3506,17 @@ define void @s_shuffle_v2p3_v3p3__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3555,20 +3555,20 @@ define void @s_shuffle_v2p3_v3p3__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3607,20 +3607,20 @@ define void @s_shuffle_v2p3_v3p3__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3659,20 +3659,20 @@ define void @s_shuffle_v2p3_v3p3__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3703,17 +3703,17 @@ define void @s_shuffle_v2p3_v3p3__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3765,17 +3765,17 @@ define void @s_shuffle_v2p3_v3p3__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3814,20 +3814,20 @@ define void @s_shuffle_v2p3_v3p3__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3868,21 +3868,21 @@ define void @s_shuffle_v2p3_v3p3__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3923,21 +3923,21 @@ define void @s_shuffle_v2p3_v3p3__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3991,18 +3991,18 @@ define void @s_shuffle_v2p3_v3p3__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v3p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v3p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll index 20abdd10f949e..a9085502c7358 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p3_v4p3__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p3_v4p3__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2p3_v4p3__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2p3_v4p3__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -159,17 +159,17 @@ define void @v_shuffle_v2p3_v4p3__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -211,17 +211,17 @@ define void @v_shuffle_v2p3_v4p3__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -252,16 +252,16 @@ define void @v_shuffle_v2p3_v4p3__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -294,17 +294,17 @@ define void @v_shuffle_v2p3_v4p3__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -344,21 +344,21 @@ define void @v_shuffle_v2p3_v4p3__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -397,21 +397,21 @@ define void @v_shuffle_v2p3_v4p3__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -450,21 +450,21 @@ define void @v_shuffle_v2p3_v4p3__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -503,21 +503,21 @@ define void @v_shuffle_v2p3_v4p3__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -552,18 +552,18 @@ define void @v_shuffle_v2p3_v4p3__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -596,17 +596,17 @@ define void @v_shuffle_v2p3_v4p3__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -639,17 +639,17 @@ define void @v_shuffle_v2p3_v4p3__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -682,17 +682,17 @@ define void @v_shuffle_v2p3_v4p3__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -725,17 +725,17 @@ define void @v_shuffle_v2p3_v4p3__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -767,17 +767,17 @@ define void @v_shuffle_v2p3_v4p3__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> zeroinitializer store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -809,17 +809,17 @@ define void @v_shuffle_v2p3_v4p3__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -851,17 +851,17 @@ define void @v_shuffle_v2p3_v4p3__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -895,18 +895,18 @@ define void @v_shuffle_v2p3_v4p3__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -938,17 +938,17 @@ define void @v_shuffle_v2p3_v4p3__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -986,21 +986,21 @@ define void @v_shuffle_v2p3_v4p3__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2p3_v4p3__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1084,16 +1084,16 @@ define void @v_shuffle_v2p3_v4p3__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1123,16 +1123,16 @@ define void @v_shuffle_v2p3_v4p3__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1164,17 +1164,17 @@ define void @v_shuffle_v2p3_v4p3__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1206,17 +1206,17 @@ define void @v_shuffle_v2p3_v4p3__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1248,17 +1248,17 @@ define void @v_shuffle_v2p3_v4p3__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1288,16 +1288,16 @@ define void @v_shuffle_v2p3_v4p3__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1335,21 +1335,21 @@ define void @v_shuffle_v2p3_v4p3__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1388,21 +1388,21 @@ define void @v_shuffle_v2p3_v4p3__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1434,17 +1434,17 @@ define void @v_shuffle_v2p3_v4p3__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1476,17 +1476,17 @@ define void @v_shuffle_v2p3_v4p3__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1518,18 +1518,18 @@ define void @v_shuffle_v2p3_v4p3__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1561,17 +1561,17 @@ define void @v_shuffle_v2p3_v4p3__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1603,17 +1603,17 @@ define void @v_shuffle_v2p3_v4p3__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1644,17 +1644,17 @@ define void @v_shuffle_v2p3_v4p3__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,21 +1692,21 @@ define void @v_shuffle_v2p3_v4p3__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1745,21 +1745,21 @@ define void @v_shuffle_v2p3_v4p3__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1790,16 +1790,16 @@ define void @v_shuffle_v2p3_v4p3__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1831,17 +1831,17 @@ define void @v_shuffle_v2p3_v4p3__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1873,17 +1873,17 @@ define void @v_shuffle_v2p3_v4p3__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1913,16 +1913,16 @@ define void @v_shuffle_v2p3_v4p3__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1954,17 +1954,17 @@ define void @v_shuffle_v2p3_v4p3__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1994,16 +1994,16 @@ define void @v_shuffle_v2p3_v4p3__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2041,21 +2041,21 @@ define void @v_shuffle_v2p3_v4p3__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2094,21 +2094,21 @@ define void @v_shuffle_v2p3_v4p3__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2150,16 +2150,16 @@ define void @v_shuffle_v2p3_v4p3__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2190,17 +2190,17 @@ define void @v_shuffle_v2p3_v4p3__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2230,16 +2230,16 @@ define void @v_shuffle_v2p3_v4p3__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2271,17 +2271,17 @@ define void @v_shuffle_v2p3_v4p3__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2324,17 +2324,17 @@ define void @v_shuffle_v2p3_v4p3__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2367,17 +2367,17 @@ define void @v_shuffle_v2p3_v4p3__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2408,16 +2408,16 @@ define void @v_shuffle_v2p3_v4p3__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2456,21 +2456,21 @@ define void @v_shuffle_v2p3_v4p3__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2509,21 +2509,21 @@ define void @v_shuffle_v2p3_v4p3__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2562,21 +2562,21 @@ define void @v_shuffle_v2p3_v4p3__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2615,21 +2615,21 @@ define void @v_shuffle_v2p3_v4p3__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2660,16 +2660,16 @@ define void @v_shuffle_v2p3_v4p3__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2702,17 +2702,17 @@ define void @v_shuffle_v2p3_v4p3__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2745,17 +2745,17 @@ define void @v_shuffle_v2p3_v4p3__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2787,17 +2787,17 @@ define void @v_shuffle_v2p3_v4p3__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2836,21 +2836,21 @@ define void @v_shuffle_v2p3_v4p3__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2889,21 +2889,21 @@ define void @v_shuffle_v2p3_v4p3__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2942,21 +2942,21 @@ define void @v_shuffle_v2p3_v4p3__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2995,21 +2995,21 @@ define void @v_shuffle_v2p3_v4p3__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3042,17 +3042,17 @@ define void @v_shuffle_v2p3_v4p3__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3085,18 +3085,18 @@ define void @v_shuffle_v2p3_v4p3__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3129,17 +3129,17 @@ define void @v_shuffle_v2p3_v4p3__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3170,16 +3170,16 @@ define void @v_shuffle_v2p3_v4p3__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v2p3_v4p3__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v2p3_v4p3__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3324,21 +3324,21 @@ define void @v_shuffle_v2p3_v4p3__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3377,21 +3377,21 @@ define void @v_shuffle_v2p3_v4p3__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3424,17 +3424,17 @@ define void @v_shuffle_v2p3_v4p3__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3467,17 +3467,17 @@ define void @v_shuffle_v2p3_v4p3__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3508,16 +3508,16 @@ define void @v_shuffle_v2p3_v4p3__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v4p3__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v4p3__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3562,17 +3562,17 @@ define void @s_shuffle_v2p3_v4p3__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3604,17 +3604,17 @@ define void @s_shuffle_v2p3_v4p3__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3646,17 +3646,17 @@ define void @s_shuffle_v2p3_v4p3__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3688,17 +3688,17 @@ define void @s_shuffle_v2p3_v4p3__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -3744,17 +3744,17 @@ define void @s_shuffle_v2p3_v4p3__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3787,17 +3787,17 @@ define void @s_shuffle_v2p3_v4p3__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3830,17 +3830,17 @@ define void @s_shuffle_v2p3_v4p3__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3881,21 +3881,21 @@ define void @s_shuffle_v2p3_v4p3__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3934,20 +3934,20 @@ define void @s_shuffle_v2p3_v4p3__7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3988,21 +3988,21 @@ define void @s_shuffle_v2p3_v4p3__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4043,21 +4043,21 @@ define void @s_shuffle_v2p3_v4p3__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4092,18 +4092,18 @@ define void @s_shuffle_v2p3_v4p3__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4157,18 +4157,18 @@ define void @s_shuffle_v2p3_v4p3__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4203,18 +4203,18 @@ define void @s_shuffle_v2p3_v4p3__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4247,17 +4247,17 @@ define void @s_shuffle_v2p3_v4p3__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4309,18 +4309,18 @@ define void @s_shuffle_v2p3_v4p3__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4354,18 +4354,18 @@ define void @s_shuffle_v2p3_v4p3__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4399,18 +4399,18 @@ define void @s_shuffle_v2p3_v4p3__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4442,17 +4442,17 @@ define void @s_shuffle_v2p3_v4p3__4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4492,21 +4492,21 @@ define void @s_shuffle_v2p3_v4p3__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4547,21 +4547,21 @@ define void @s_shuffle_v2p3_v4p3__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4592,17 +4592,17 @@ define void @s_shuffle_v2p3_v4p3__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4632,17 +4632,17 @@ define void @s_shuffle_v2p3_v4p3__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4694,18 +4694,18 @@ define void @s_shuffle_v2p3_v4p3__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4753,17 +4753,17 @@ define void @s_shuffle_v2p3_v4p3__4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4801,20 +4801,20 @@ define void @s_shuffle_v2p3_v4p3__5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4855,21 +4855,21 @@ define void @s_shuffle_v2p3_v4p3__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4902,17 +4902,17 @@ define void @s_shuffle_v2p3_v4p3__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -4964,18 +4964,18 @@ define void @s_shuffle_v2p3_v4p3__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5009,18 +5009,18 @@ define void @s_shuffle_v2p3_v4p3__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5054,18 +5054,18 @@ define void @s_shuffle_v2p3_v4p3__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5097,17 +5097,17 @@ define void @s_shuffle_v2p3_v4p3__4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5147,21 +5147,21 @@ define void @s_shuffle_v2p3_v4p3__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5202,21 +5202,21 @@ define void @s_shuffle_v2p3_v4p3__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5249,17 +5249,17 @@ define void @s_shuffle_v2p3_v4p3__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5311,18 +5311,18 @@ define void @s_shuffle_v2p3_v4p3__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5354,17 +5354,17 @@ define void @s_shuffle_v2p3_v4p3__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5398,18 +5398,18 @@ define void @s_shuffle_v2p3_v4p3__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5441,17 +5441,17 @@ define void @s_shuffle_v2p3_v4p3__4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5491,21 +5491,21 @@ define void @s_shuffle_v2p3_v4p3__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5546,21 +5546,21 @@ define void @s_shuffle_v2p3_v4p3__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5605,17 +5605,17 @@ define void @s_shuffle_v2p3_v4p3__0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5647,17 +5647,17 @@ define void @s_shuffle_v2p3_v4p3__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5689,17 +5689,17 @@ define void @s_shuffle_v2p3_v4p3__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5731,17 +5731,17 @@ define void @s_shuffle_v2p3_v4p3__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -5789,18 +5789,18 @@ define void @s_shuffle_v2p3_v4p3__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5835,18 +5835,18 @@ define void @s_shuffle_v2p3_v4p3__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5877,17 +5877,17 @@ define void @s_shuffle_v2p3_v4p3__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5926,20 +5926,20 @@ define void @s_shuffle_v2p3_v4p3__0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5978,20 +5978,20 @@ define void @s_shuffle_v2p3_v4p3__1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6032,21 +6032,21 @@ define void @s_shuffle_v2p3_v4p3__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6085,20 +6085,20 @@ define void @s_shuffle_v2p3_v4p3__3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6129,17 +6129,17 @@ define void @s_shuffle_v2p3_v4p3__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6193,18 +6193,18 @@ define void @s_shuffle_v2p3_v4p3__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6237,17 +6237,17 @@ define void @s_shuffle_v2p3_v4p3__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6286,20 +6286,20 @@ define void @s_shuffle_v2p3_v4p3__0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6340,21 +6340,21 @@ define void @s_shuffle_v2p3_v4p3__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6395,21 +6395,21 @@ define void @s_shuffle_v2p3_v4p3__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6450,21 +6450,21 @@ define void @s_shuffle_v2p3_v4p3__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6518,18 +6518,18 @@ define void @s_shuffle_v2p3_v4p3__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6564,18 +6564,18 @@ define void @s_shuffle_v2p3_v4p3__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6608,17 +6608,17 @@ define void @s_shuffle_v2p3_v4p3__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6657,20 +6657,20 @@ define void @s_shuffle_v2p3_v4p3__0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6711,21 +6711,21 @@ define void @s_shuffle_v2p3_v4p3__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6766,21 +6766,21 @@ define void @s_shuffle_v2p3_v4p3__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6821,21 +6821,21 @@ define void @s_shuffle_v2p3_v4p3__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6889,18 +6889,18 @@ define void @s_shuffle_v2p3_v4p3__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6933,17 +6933,17 @@ define void @s_shuffle_v2p3_v4p3__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v4p3__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v4p3__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll index df7bdbf04d4e3..9174e92cd9c82 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v2p3_v8p3__u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v2p3_v8p3__0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -78,17 +78,17 @@ define void @v_shuffle_v2p3_v8p3__1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -118,16 +118,16 @@ define void @v_shuffle_v2p3_v8p3__2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -158,17 +158,17 @@ define void @v_shuffle_v2p3_v8p3__3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -198,16 +198,16 @@ define void @v_shuffle_v2p3_v8p3__4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -238,17 +238,17 @@ define void @v_shuffle_v2p3_v8p3__5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -278,16 +278,16 @@ define void @v_shuffle_v2p3_v8p3__6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -319,17 +319,17 @@ define void @v_shuffle_v2p3_v8p3__7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -371,17 +371,17 @@ define void @v_shuffle_v2p3_v8p3__9_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -412,16 +412,16 @@ define void @v_shuffle_v2p3_v8p3__10_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -453,17 +453,17 @@ define void @v_shuffle_v2p3_v8p3__11_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -494,16 +494,16 @@ define void @v_shuffle_v2p3_v8p3__12_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -535,17 +535,17 @@ define void @v_shuffle_v2p3_v8p3__13_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -576,16 +576,16 @@ define void @v_shuffle_v2p3_v8p3__14_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -618,17 +618,17 @@ define void @v_shuffle_v2p3_v8p3__15_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -668,21 +668,21 @@ define void @v_shuffle_v2p3_v8p3__15_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -721,21 +721,21 @@ define void @v_shuffle_v2p3_v8p3__15_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -774,21 +774,21 @@ define void @v_shuffle_v2p3_v8p3__15_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -827,21 +827,21 @@ define void @v_shuffle_v2p3_v8p3__15_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -880,21 +880,21 @@ define void @v_shuffle_v2p3_v8p3__15_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -933,21 +933,21 @@ define void @v_shuffle_v2p3_v8p3__15_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -986,21 +986,21 @@ define void @v_shuffle_v2p3_v8p3__15_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[14:15], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1039,21 +1039,21 @@ define void @v_shuffle_v2p3_v8p3__15_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1088,18 +1088,18 @@ define void @v_shuffle_v2p3_v8p3__15_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1132,17 +1132,17 @@ define void @v_shuffle_v2p3_v8p3__15_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1176,18 +1176,18 @@ define void @v_shuffle_v2p3_v8p3__15_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1220,17 +1220,17 @@ define void @v_shuffle_v2p3_v8p3__15_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1264,18 +1264,18 @@ define void @v_shuffle_v2p3_v8p3__15_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1308,17 +1308,17 @@ define void @v_shuffle_v2p3_v8p3__15_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1351,17 +1351,17 @@ define void @v_shuffle_v2p3_v8p3__15_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1394,17 +1394,17 @@ define void @v_shuffle_v2p3_v8p3__15_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1437,17 +1437,17 @@ define void @v_shuffle_v2p3_v8p3__u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1479,17 +1479,17 @@ define void @v_shuffle_v2p3_v8p3__0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> zeroinitializer store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1521,17 +1521,17 @@ define void @v_shuffle_v2p3_v8p3__1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1563,17 +1563,17 @@ define void @v_shuffle_v2p3_v8p3__2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1606,18 +1606,18 @@ define void @v_shuffle_v2p3_v8p3__3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1649,17 +1649,17 @@ define void @v_shuffle_v2p3_v8p3__4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1692,18 +1692,18 @@ define void @v_shuffle_v2p3_v8p3__5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1735,17 +1735,17 @@ define void @v_shuffle_v2p3_v8p3__6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1779,18 +1779,18 @@ define void @v_shuffle_v2p3_v8p3__7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1822,17 +1822,17 @@ define void @v_shuffle_v2p3_v8p3__8_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1870,21 +1870,21 @@ define void @v_shuffle_v2p3_v8p3__9_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1923,21 +1923,21 @@ define void @v_shuffle_v2p3_v8p3__10_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -1976,21 +1976,21 @@ define void @v_shuffle_v2p3_v8p3__11_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2029,21 +2029,21 @@ define void @v_shuffle_v2p3_v8p3__12_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2082,21 +2082,21 @@ define void @v_shuffle_v2p3_v8p3__13_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2135,21 +2135,21 @@ define void @v_shuffle_v2p3_v8p3__14_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v0 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v0 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2180,16 +2180,16 @@ define void @v_shuffle_v2p3_v8p3__u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2219,16 +2219,16 @@ define void @v_shuffle_v2p3_v8p3__0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2260,17 +2260,17 @@ define void @v_shuffle_v2p3_v8p3__1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2302,17 +2302,17 @@ define void @v_shuffle_v2p3_v8p3__2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2344,17 +2344,17 @@ define void @v_shuffle_v2p3_v8p3__3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2386,17 +2386,17 @@ define void @v_shuffle_v2p3_v8p3__4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2428,17 +2428,17 @@ define void @v_shuffle_v2p3_v8p3__5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2470,17 +2470,17 @@ define void @v_shuffle_v2p3_v8p3__6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2512,17 +2512,17 @@ define void @v_shuffle_v2p3_v8p3__7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2552,16 +2552,16 @@ define void @v_shuffle_v2p3_v8p3__8_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2599,21 +2599,21 @@ define void @v_shuffle_v2p3_v8p3__9_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2652,21 +2652,21 @@ define void @v_shuffle_v2p3_v8p3__10_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2705,21 +2705,21 @@ define void @v_shuffle_v2p3_v8p3__11_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2758,21 +2758,21 @@ define void @v_shuffle_v2p3_v8p3__12_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2811,21 +2811,21 @@ define void @v_shuffle_v2p3_v8p3__13_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2864,21 +2864,21 @@ define void @v_shuffle_v2p3_v8p3__14_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -2910,17 +2910,17 @@ define void @v_shuffle_v2p3_v8p3__u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2952,17 +2952,17 @@ define void @v_shuffle_v2p3_v8p3__0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2994,18 +2994,18 @@ define void @v_shuffle_v2p3_v8p3__1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3037,17 +3037,17 @@ define void @v_shuffle_v2p3_v8p3__2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3079,17 +3079,17 @@ define void @v_shuffle_v2p3_v8p3__3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3121,17 +3121,17 @@ define void @v_shuffle_v2p3_v8p3__4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3164,18 +3164,18 @@ define void @v_shuffle_v2p3_v8p3__5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3207,17 +3207,17 @@ define void @v_shuffle_v2p3_v8p3__6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3250,18 +3250,18 @@ define void @v_shuffle_v2p3_v8p3__7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3292,17 +3292,17 @@ define void @v_shuffle_v2p3_v8p3__8_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3340,21 +3340,21 @@ define void @v_shuffle_v2p3_v8p3__9_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3393,21 +3393,21 @@ define void @v_shuffle_v2p3_v8p3__10_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3446,21 +3446,21 @@ define void @v_shuffle_v2p3_v8p3__11_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3499,21 +3499,21 @@ define void @v_shuffle_v2p3_v8p3__12_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3552,21 +3552,21 @@ define void @v_shuffle_v2p3_v8p3__13_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3605,21 +3605,21 @@ define void @v_shuffle_v2p3_v8p3__14_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v2 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v2 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -3650,16 +3650,16 @@ define void @v_shuffle_v2p3_v8p3__u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3691,17 +3691,17 @@ define void @v_shuffle_v2p3_v8p3__0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3733,17 +3733,17 @@ define void @v_shuffle_v2p3_v8p3__1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3773,16 +3773,16 @@ define void @v_shuffle_v2p3_v8p3__2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3814,17 +3814,17 @@ define void @v_shuffle_v2p3_v8p3__3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3856,17 +3856,17 @@ define void @v_shuffle_v2p3_v8p3__4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3898,17 +3898,17 @@ define void @v_shuffle_v2p3_v8p3__5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3940,17 +3940,17 @@ define void @v_shuffle_v2p3_v8p3__6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3982,17 +3982,17 @@ define void @v_shuffle_v2p3_v8p3__7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4022,16 +4022,16 @@ define void @v_shuffle_v2p3_v8p3__8_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4069,21 +4069,21 @@ define void @v_shuffle_v2p3_v8p3__9_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4122,21 +4122,21 @@ define void @v_shuffle_v2p3_v8p3__10_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4175,21 +4175,21 @@ define void @v_shuffle_v2p3_v8p3__11_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4228,21 +4228,21 @@ define void @v_shuffle_v2p3_v8p3__12_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4281,21 +4281,21 @@ define void @v_shuffle_v2p3_v8p3__13_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4334,21 +4334,21 @@ define void @v_shuffle_v2p3_v8p3__14_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4380,17 +4380,17 @@ define void @v_shuffle_v2p3_v8p3__u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4422,17 +4422,17 @@ define void @v_shuffle_v2p3_v8p3__0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4465,18 +4465,18 @@ define void @v_shuffle_v2p3_v8p3__1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4508,17 +4508,17 @@ define void @v_shuffle_v2p3_v8p3__2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4550,18 +4550,18 @@ define void @v_shuffle_v2p3_v8p3__3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4593,17 +4593,17 @@ define void @v_shuffle_v2p3_v8p3__4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4635,17 +4635,17 @@ define void @v_shuffle_v2p3_v8p3__5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4677,17 +4677,17 @@ define void @v_shuffle_v2p3_v8p3__6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4720,18 +4720,18 @@ define void @v_shuffle_v2p3_v8p3__7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4762,17 +4762,17 @@ define void @v_shuffle_v2p3_v8p3__8_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4810,21 +4810,21 @@ define void @v_shuffle_v2p3_v8p3__9_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4863,21 +4863,21 @@ define void @v_shuffle_v2p3_v8p3__10_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4916,21 +4916,21 @@ define void @v_shuffle_v2p3_v8p3__11_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -4969,21 +4969,21 @@ define void @v_shuffle_v2p3_v8p3__12_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5022,21 +5022,21 @@ define void @v_shuffle_v2p3_v8p3__13_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5075,21 +5075,21 @@ define void @v_shuffle_v2p3_v8p3__14_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v4 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v4 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5120,16 +5120,16 @@ define void @v_shuffle_v2p3_v8p3__u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5161,17 +5161,17 @@ define void @v_shuffle_v2p3_v8p3__0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5203,17 +5203,17 @@ define void @v_shuffle_v2p3_v8p3__1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5245,17 +5245,17 @@ define void @v_shuffle_v2p3_v8p3__2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5287,17 +5287,17 @@ define void @v_shuffle_v2p3_v8p3__3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5327,16 +5327,16 @@ define void @v_shuffle_v2p3_v8p3__4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5368,17 +5368,17 @@ define void @v_shuffle_v2p3_v8p3__5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5410,17 +5410,17 @@ define void @v_shuffle_v2p3_v8p3__6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5452,17 +5452,17 @@ define void @v_shuffle_v2p3_v8p3__7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5492,16 +5492,16 @@ define void @v_shuffle_v2p3_v8p3__8_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5539,21 +5539,21 @@ define void @v_shuffle_v2p3_v8p3__9_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5592,21 +5592,21 @@ define void @v_shuffle_v2p3_v8p3__10_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5645,21 +5645,21 @@ define void @v_shuffle_v2p3_v8p3__11_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5698,21 +5698,21 @@ define void @v_shuffle_v2p3_v8p3__12_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5751,21 +5751,21 @@ define void @v_shuffle_v2p3_v8p3__13_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5804,21 +5804,21 @@ define void @v_shuffle_v2p3_v8p3__14_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -5850,17 +5850,17 @@ define void @v_shuffle_v2p3_v8p3__u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5892,17 +5892,17 @@ define void @v_shuffle_v2p3_v8p3__0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5935,18 +5935,18 @@ define void @v_shuffle_v2p3_v8p3__1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5978,17 +5978,17 @@ define void @v_shuffle_v2p3_v8p3__2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6021,18 +6021,18 @@ define void @v_shuffle_v2p3_v8p3__3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6064,17 +6064,17 @@ define void @v_shuffle_v2p3_v8p3__4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6106,18 +6106,18 @@ define void @v_shuffle_v2p3_v8p3__5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6149,17 +6149,17 @@ define void @v_shuffle_v2p3_v8p3__6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6191,17 +6191,17 @@ define void @v_shuffle_v2p3_v8p3__7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6232,17 +6232,17 @@ define void @v_shuffle_v2p3_v8p3__8_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6280,21 +6280,21 @@ define void @v_shuffle_v2p3_v8p3__9_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6333,21 +6333,21 @@ define void @v_shuffle_v2p3_v8p3__10_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6386,21 +6386,21 @@ define void @v_shuffle_v2p3_v8p3__11_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[10:11], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6439,21 +6439,21 @@ define void @v_shuffle_v2p3_v8p3__12_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6492,21 +6492,21 @@ define void @v_shuffle_v2p3_v8p3__13_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[12:13], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6545,21 +6545,21 @@ define void @v_shuffle_v2p3_v8p3__14_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v6 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v6 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -6590,16 +6590,16 @@ define void @v_shuffle_v2p3_v8p3__u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6631,17 +6631,17 @@ define void @v_shuffle_v2p3_v8p3__0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6673,17 +6673,17 @@ define void @v_shuffle_v2p3_v8p3__1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6715,17 +6715,17 @@ define void @v_shuffle_v2p3_v8p3__2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6757,17 +6757,17 @@ define void @v_shuffle_v2p3_v8p3__3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6799,17 +6799,17 @@ define void @v_shuffle_v2p3_v8p3__4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6841,17 +6841,17 @@ define void @v_shuffle_v2p3_v8p3__5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6881,16 +6881,16 @@ define void @v_shuffle_v2p3_v8p3__6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6922,17 +6922,17 @@ define void @v_shuffle_v2p3_v8p3__7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -6962,16 +6962,16 @@ define void @v_shuffle_v2p3_v8p3__8_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7009,21 +7009,21 @@ define void @v_shuffle_v2p3_v8p3__9_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7062,21 +7062,21 @@ define void @v_shuffle_v2p3_v8p3__10_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7115,21 +7115,21 @@ define void @v_shuffle_v2p3_v8p3__11_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7168,21 +7168,21 @@ define void @v_shuffle_v2p3_v8p3__12_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7221,21 +7221,21 @@ define void @v_shuffle_v2p3_v8p3__13_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7274,21 +7274,21 @@ define void @v_shuffle_v2p3_v8p3__14_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7330,16 +7330,16 @@ define void @v_shuffle_v2p3_v8p3__0_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7370,17 +7370,17 @@ define void @v_shuffle_v2p3_v8p3__1_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7410,16 +7410,16 @@ define void @v_shuffle_v2p3_v8p3__2_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7450,17 +7450,17 @@ define void @v_shuffle_v2p3_v8p3__3_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7490,16 +7490,16 @@ define void @v_shuffle_v2p3_v8p3__4_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7530,17 +7530,17 @@ define void @v_shuffle_v2p3_v8p3__5_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7570,16 +7570,16 @@ define void @v_shuffle_v2p3_v8p3__6_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7611,17 +7611,17 @@ define void @v_shuffle_v2p3_v8p3__7_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> store <2 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7664,17 +7664,17 @@ define void @v_shuffle_v2p3_v8p3__9_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7707,17 +7707,17 @@ define void @v_shuffle_v2p3_v8p3__10_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7751,18 +7751,18 @@ define void @v_shuffle_v2p3_v8p3__11_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7795,17 +7795,17 @@ define void @v_shuffle_v2p3_v8p3__12_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7839,18 +7839,18 @@ define void @v_shuffle_v2p3_v8p3__13_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7883,17 +7883,17 @@ define void @v_shuffle_v2p3_v8p3__14_8(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7924,16 +7924,16 @@ define void @v_shuffle_v2p3_v8p3__u_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -7972,21 +7972,21 @@ define void @v_shuffle_v2p3_v8p3__0_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8025,21 +8025,21 @@ define void @v_shuffle_v2p3_v8p3__1_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8078,21 +8078,21 @@ define void @v_shuffle_v2p3_v8p3__2_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8131,21 +8131,21 @@ define void @v_shuffle_v2p3_v8p3__3_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8184,21 +8184,21 @@ define void @v_shuffle_v2p3_v8p3__4_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8237,21 +8237,21 @@ define void @v_shuffle_v2p3_v8p3__5_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8290,21 +8290,21 @@ define void @v_shuffle_v2p3_v8p3__6_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8343,21 +8343,21 @@ define void @v_shuffle_v2p3_v8p3__7_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8388,16 +8388,16 @@ define void @v_shuffle_v2p3_v8p3__8_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8430,17 +8430,17 @@ define void @v_shuffle_v2p3_v8p3__9_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8473,17 +8473,17 @@ define void @v_shuffle_v2p3_v8p3__10_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8516,17 +8516,17 @@ define void @v_shuffle_v2p3_v8p3__11_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8559,17 +8559,17 @@ define void @v_shuffle_v2p3_v8p3__12_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8602,17 +8602,17 @@ define void @v_shuffle_v2p3_v8p3__13_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8645,17 +8645,17 @@ define void @v_shuffle_v2p3_v8p3__14_9(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8687,17 +8687,17 @@ define void @v_shuffle_v2p3_v8p3__u_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8736,21 +8736,21 @@ define void @v_shuffle_v2p3_v8p3__0_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8789,21 +8789,21 @@ define void @v_shuffle_v2p3_v8p3__1_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8842,21 +8842,21 @@ define void @v_shuffle_v2p3_v8p3__2_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8895,21 +8895,21 @@ define void @v_shuffle_v2p3_v8p3__3_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -8948,21 +8948,21 @@ define void @v_shuffle_v2p3_v8p3__4_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v8 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v8 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9001,21 +9001,21 @@ define void @v_shuffle_v2p3_v8p3__5_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9054,21 +9054,21 @@ define void @v_shuffle_v2p3_v8p3__6_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v10 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v10 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9107,21 +9107,21 @@ define void @v_shuffle_v2p3_v8p3__7_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9154,17 +9154,17 @@ define void @v_shuffle_v2p3_v8p3__8_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9197,18 +9197,18 @@ define void @v_shuffle_v2p3_v8p3__9_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9241,17 +9241,17 @@ define void @v_shuffle_v2p3_v8p3__10_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9284,17 +9284,17 @@ define void @v_shuffle_v2p3_v8p3__11_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9327,17 +9327,17 @@ define void @v_shuffle_v2p3_v8p3__12_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9371,18 +9371,18 @@ define void @v_shuffle_v2p3_v8p3__13_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9415,17 +9415,17 @@ define void @v_shuffle_v2p3_v8p3__14_10(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9456,16 +9456,16 @@ define void @v_shuffle_v2p3_v8p3__u_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9504,21 +9504,21 @@ define void @v_shuffle_v2p3_v8p3__0_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9557,21 +9557,21 @@ define void @v_shuffle_v2p3_v8p3__1_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9610,21 +9610,21 @@ define void @v_shuffle_v2p3_v8p3__2_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9663,21 +9663,21 @@ define void @v_shuffle_v2p3_v8p3__3_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9716,21 +9716,21 @@ define void @v_shuffle_v2p3_v8p3__4_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9769,21 +9769,21 @@ define void @v_shuffle_v2p3_v8p3__5_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9822,21 +9822,21 @@ define void @v_shuffle_v2p3_v8p3__6_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9875,21 +9875,21 @@ define void @v_shuffle_v2p3_v8p3__7_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9922,17 +9922,17 @@ define void @v_shuffle_v2p3_v8p3__8_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -9965,17 +9965,17 @@ define void @v_shuffle_v2p3_v8p3__9_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10006,16 +10006,16 @@ define void @v_shuffle_v2p3_v8p3__10_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10048,17 +10048,17 @@ define void @v_shuffle_v2p3_v8p3__11_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10091,17 +10091,17 @@ define void @v_shuffle_v2p3_v8p3__12_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10134,17 +10134,17 @@ define void @v_shuffle_v2p3_v8p3__13_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10177,17 +10177,17 @@ define void @v_shuffle_v2p3_v8p3__14_11(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10219,17 +10219,17 @@ define void @v_shuffle_v2p3_v8p3__u_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10268,21 +10268,21 @@ define void @v_shuffle_v2p3_v8p3__0_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10321,21 +10321,21 @@ define void @v_shuffle_v2p3_v8p3__1_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10374,21 +10374,21 @@ define void @v_shuffle_v2p3_v8p3__2_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v8 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v8 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10427,21 +10427,21 @@ define void @v_shuffle_v2p3_v8p3__3_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10480,21 +10480,21 @@ define void @v_shuffle_v2p3_v8p3__4_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v10 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v10 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10533,21 +10533,21 @@ define void @v_shuffle_v2p3_v8p3__5_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10586,21 +10586,21 @@ define void @v_shuffle_v2p3_v8p3__6_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v12 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v12 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10639,21 +10639,21 @@ define void @v_shuffle_v2p3_v8p3__7_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10686,17 +10686,17 @@ define void @v_shuffle_v2p3_v8p3__8_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10730,18 +10730,18 @@ define void @v_shuffle_v2p3_v8p3__9_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10774,17 +10774,17 @@ define void @v_shuffle_v2p3_v8p3__10_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10817,18 +10817,18 @@ define void @v_shuffle_v2p3_v8p3__11_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10861,17 +10861,17 @@ define void @v_shuffle_v2p3_v8p3__12_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10904,17 +10904,17 @@ define void @v_shuffle_v2p3_v8p3__13_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10947,17 +10947,17 @@ define void @v_shuffle_v2p3_v8p3__14_12(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v4 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v4 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -10988,16 +10988,16 @@ define void @v_shuffle_v2p3_v8p3__u_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11036,21 +11036,21 @@ define void @v_shuffle_v2p3_v8p3__0_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11089,21 +11089,21 @@ define void @v_shuffle_v2p3_v8p3__1_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11142,21 +11142,21 @@ define void @v_shuffle_v2p3_v8p3__2_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11195,21 +11195,21 @@ define void @v_shuffle_v2p3_v8p3__3_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11248,21 +11248,21 @@ define void @v_shuffle_v2p3_v8p3__4_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11301,21 +11301,21 @@ define void @v_shuffle_v2p3_v8p3__5_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11354,21 +11354,21 @@ define void @v_shuffle_v2p3_v8p3__6_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11407,21 +11407,21 @@ define void @v_shuffle_v2p3_v8p3__7_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11454,17 +11454,17 @@ define void @v_shuffle_v2p3_v8p3__8_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11497,17 +11497,17 @@ define void @v_shuffle_v2p3_v8p3__9_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11540,17 +11540,17 @@ define void @v_shuffle_v2p3_v8p3__10_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11583,17 +11583,17 @@ define void @v_shuffle_v2p3_v8p3__11_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11624,16 +11624,16 @@ define void @v_shuffle_v2p3_v8p3__12_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11666,17 +11666,17 @@ define void @v_shuffle_v2p3_v8p3__13_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11709,17 +11709,17 @@ define void @v_shuffle_v2p3_v8p3__14_13(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11751,17 +11751,17 @@ define void @v_shuffle_v2p3_v8p3__u_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11800,21 +11800,21 @@ define void @v_shuffle_v2p3_v8p3__0_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11853,21 +11853,21 @@ define void @v_shuffle_v2p3_v8p3__1_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[8:9] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11906,21 +11906,21 @@ define void @v_shuffle_v2p3_v8p3__2_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v10 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v10 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -11959,21 +11959,21 @@ define void @v_shuffle_v2p3_v8p3__3_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[10:11] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12012,21 +12012,21 @@ define void @v_shuffle_v2p3_v8p3__4_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v12 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v12 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12065,21 +12065,21 @@ define void @v_shuffle_v2p3_v8p3__5_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[12:13] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12118,21 +12118,21 @@ define void @v_shuffle_v2p3_v8p3__6_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v14 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v14 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12171,21 +12171,21 @@ define void @v_shuffle_v2p3_v8p3__7_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[14:15] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12218,17 +12218,17 @@ define void @v_shuffle_v2p3_v8p3__8_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12262,18 +12262,18 @@ define void @v_shuffle_v2p3_v8p3__9_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12306,17 +12306,17 @@ define void @v_shuffle_v2p3_v8p3__10_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12350,18 +12350,18 @@ define void @v_shuffle_v2p3_v8p3__11_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12394,17 +12394,17 @@ define void @v_shuffle_v2p3_v8p3__12_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12437,18 +12437,18 @@ define void @v_shuffle_v2p3_v8p3__13_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12481,17 +12481,17 @@ define void @v_shuffle_v2p3_v8p3__14_14(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12522,16 +12522,16 @@ define void @v_shuffle_v2p3_v8p3__u_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12570,21 +12570,21 @@ define void @v_shuffle_v2p3_v8p3__0_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12623,21 +12623,21 @@ define void @v_shuffle_v2p3_v8p3__1_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v1 -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v1 +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12676,21 +12676,21 @@ define void @v_shuffle_v2p3_v8p3__2_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12729,21 +12729,21 @@ define void @v_shuffle_v2p3_v8p3__3_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v3 -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v3 +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12782,21 +12782,21 @@ define void @v_shuffle_v2p3_v8p3__4_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12835,21 +12835,21 @@ define void @v_shuffle_v2p3_v8p3__5_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v5 -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v5 +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12888,21 +12888,21 @@ define void @v_shuffle_v2p3_v8p3__6_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v7, v15 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12941,21 +12941,21 @@ define void @v_shuffle_v2p3_v8p3__7_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v14, v7 -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v14, v7 +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -12988,17 +12988,17 @@ define void @v_shuffle_v2p3_v8p3__8_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__8_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13031,17 +13031,17 @@ define void @v_shuffle_v2p3_v8p3__9_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13074,17 +13074,17 @@ define void @v_shuffle_v2p3_v8p3__10_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13117,17 +13117,17 @@ define void @v_shuffle_v2p3_v8p3__11_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13160,17 +13160,17 @@ define void @v_shuffle_v2p3_v8p3__12_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__12_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__12_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13203,17 +13203,17 @@ define void @v_shuffle_v2p3_v8p3__13_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13244,16 +13244,16 @@ define void @v_shuffle_v2p3_v8p3__14_15(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v2p3_v8p3__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v2p3_v8p3__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13298,17 +13298,17 @@ define void @s_shuffle_v2p3_v8p3__0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13340,17 +13340,17 @@ define void @s_shuffle_v2p3_v8p3__1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13382,17 +13382,17 @@ define void @s_shuffle_v2p3_v8p3__2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13424,17 +13424,17 @@ define void @s_shuffle_v2p3_v8p3__3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13464,17 +13464,17 @@ define void @s_shuffle_v2p3_v8p3__4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13506,17 +13506,17 @@ define void @s_shuffle_v2p3_v8p3__5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13548,17 +13548,17 @@ define void @s_shuffle_v2p3_v8p3__6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13590,17 +13590,17 @@ define void @s_shuffle_v2p3_v8p3__7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -13646,17 +13646,17 @@ define void @s_shuffle_v2p3_v8p3__9_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13689,17 +13689,17 @@ define void @s_shuffle_v2p3_v8p3__10_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13732,17 +13732,17 @@ define void @s_shuffle_v2p3_v8p3__11_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13773,17 +13773,17 @@ define void @s_shuffle_v2p3_v8p3__12_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13816,17 +13816,17 @@ define void @s_shuffle_v2p3_v8p3__13_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13859,17 +13859,17 @@ define void @s_shuffle_v2p3_v8p3__14_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13902,17 +13902,17 @@ define void @s_shuffle_v2p3_v8p3__15_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -13953,22 +13953,22 @@ define void @s_shuffle_v2p3_v8p3__15_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14007,20 +14007,20 @@ define void @s_shuffle_v2p3_v8p3__15_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14061,22 +14061,22 @@ define void @s_shuffle_v2p3_v8p3__15_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14117,22 +14117,22 @@ define void @s_shuffle_v2p3_v8p3__15_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14173,21 +14173,21 @@ define void @s_shuffle_v2p3_v8p3__15_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14226,21 +14226,21 @@ define void @s_shuffle_v2p3_v8p3__15_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14281,21 +14281,21 @@ define void @s_shuffle_v2p3_v8p3__15_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14336,21 +14336,21 @@ define void @s_shuffle_v2p3_v8p3__15_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14385,18 +14385,18 @@ define void @s_shuffle_v2p3_v8p3__15_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14450,18 +14450,18 @@ define void @s_shuffle_v2p3_v8p3__15_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14496,18 +14496,18 @@ define void @s_shuffle_v2p3_v8p3__15_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14542,18 +14542,18 @@ define void @s_shuffle_v2p3_v8p3__15_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14607,18 +14607,18 @@ define void @s_shuffle_v2p3_v8p3__15_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14653,18 +14653,18 @@ define void @s_shuffle_v2p3_v8p3__15_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__15_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__15_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -14697,17 +14697,17 @@ define void @s_shuffle_v2p3_v8p3__u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -14759,18 +14759,18 @@ define void @s_shuffle_v2p3_v8p3__1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -14804,18 +14804,18 @@ define void @s_shuffle_v2p3_v8p3__2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -14849,18 +14849,18 @@ define void @s_shuffle_v2p3_v8p3__3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -14912,18 +14912,18 @@ define void @s_shuffle_v2p3_v8p3__5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -14957,18 +14957,18 @@ define void @s_shuffle_v2p3_v8p3__6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15002,18 +15002,18 @@ define void @s_shuffle_v2p3_v8p3__7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15045,17 +15045,17 @@ define void @s_shuffle_v2p3_v8p3__8_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15095,22 +15095,22 @@ define void @s_shuffle_v2p3_v8p3__9_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15151,22 +15151,22 @@ define void @s_shuffle_v2p3_v8p3__10_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15207,22 +15207,22 @@ define void @s_shuffle_v2p3_v8p3__11_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15261,21 +15261,21 @@ define void @s_shuffle_v2p3_v8p3__12_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15316,22 +15316,22 @@ define void @s_shuffle_v2p3_v8p3__13_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15372,22 +15372,22 @@ define void @s_shuffle_v2p3_v8p3__14_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15418,17 +15418,17 @@ define void @s_shuffle_v2p3_v8p3__u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15458,17 +15458,17 @@ define void @s_shuffle_v2p3_v8p3__0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15520,18 +15520,18 @@ define void @s_shuffle_v2p3_v8p3__2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15619,18 +15619,18 @@ define void @s_shuffle_v2p3_v8p3__6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15678,17 +15678,17 @@ define void @s_shuffle_v2p3_v8p3__8_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -15726,20 +15726,20 @@ define void @s_shuffle_v2p3_v8p3__9_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15780,22 +15780,22 @@ define void @s_shuffle_v2p3_v8p3__10_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15834,20 +15834,20 @@ define void @s_shuffle_v2p3_v8p3__11_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15886,21 +15886,21 @@ define void @s_shuffle_v2p3_v8p3__12_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15939,20 +15939,20 @@ define void @s_shuffle_v2p3_v8p3__13_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -15993,22 +15993,22 @@ define void @s_shuffle_v2p3_v8p3__14_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16041,17 +16041,17 @@ define void @s_shuffle_v2p3_v8p3__u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16103,18 +16103,18 @@ define void @s_shuffle_v2p3_v8p3__1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16148,18 +16148,18 @@ define void @s_shuffle_v2p3_v8p3__2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16193,18 +16193,18 @@ define void @s_shuffle_v2p3_v8p3__3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16256,18 +16256,18 @@ define void @s_shuffle_v2p3_v8p3__5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16301,18 +16301,18 @@ define void @s_shuffle_v2p3_v8p3__6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16346,18 +16346,18 @@ define void @s_shuffle_v2p3_v8p3__7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16389,17 +16389,17 @@ define void @s_shuffle_v2p3_v8p3__8_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16439,22 +16439,22 @@ define void @s_shuffle_v2p3_v8p3__9_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16495,22 +16495,22 @@ define void @s_shuffle_v2p3_v8p3__10_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16551,22 +16551,22 @@ define void @s_shuffle_v2p3_v8p3__11_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16605,21 +16605,21 @@ define void @s_shuffle_v2p3_v8p3__12_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16660,22 +16660,22 @@ define void @s_shuffle_v2p3_v8p3__13_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16716,22 +16716,22 @@ define void @s_shuffle_v2p3_v8p3__14_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -16764,17 +16764,17 @@ define void @s_shuffle_v2p3_v8p3__u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16826,18 +16826,18 @@ define void @s_shuffle_v2p3_v8p3__1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16869,17 +16869,17 @@ define void @s_shuffle_v2p3_v8p3__2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16913,18 +16913,18 @@ define void @s_shuffle_v2p3_v8p3__3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -16976,18 +16976,18 @@ define void @s_shuffle_v2p3_v8p3__5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17021,18 +17021,18 @@ define void @s_shuffle_v2p3_v8p3__6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17066,18 +17066,18 @@ define void @s_shuffle_v2p3_v8p3__7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17109,17 +17109,17 @@ define void @s_shuffle_v2p3_v8p3__8_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17159,22 +17159,22 @@ define void @s_shuffle_v2p3_v8p3__9_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17215,22 +17215,22 @@ define void @s_shuffle_v2p3_v8p3__10_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17271,22 +17271,22 @@ define void @s_shuffle_v2p3_v8p3__11_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17325,21 +17325,21 @@ define void @s_shuffle_v2p3_v8p3__12_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17380,22 +17380,22 @@ define void @s_shuffle_v2p3_v8p3__13_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17436,22 +17436,22 @@ define void @s_shuffle_v2p3_v8p3__14_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17484,17 +17484,17 @@ define void @s_shuffle_v2p3_v8p3__u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17546,18 +17546,18 @@ define void @s_shuffle_v2p3_v8p3__1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17591,18 +17591,18 @@ define void @s_shuffle_v2p3_v8p3__2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17636,18 +17636,18 @@ define void @s_shuffle_v2p3_v8p3__3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17699,18 +17699,18 @@ define void @s_shuffle_v2p3_v8p3__5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17744,18 +17744,18 @@ define void @s_shuffle_v2p3_v8p3__6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17789,18 +17789,18 @@ define void @s_shuffle_v2p3_v8p3__7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17832,17 +17832,17 @@ define void @s_shuffle_v2p3_v8p3__8_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -17882,21 +17882,21 @@ define void @s_shuffle_v2p3_v8p3__9_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17937,21 +17937,21 @@ define void @s_shuffle_v2p3_v8p3__10_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -17992,21 +17992,21 @@ define void @s_shuffle_v2p3_v8p3__11_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18045,21 +18045,21 @@ define void @s_shuffle_v2p3_v8p3__12_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18100,21 +18100,21 @@ define void @s_shuffle_v2p3_v8p3__13_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18155,21 +18155,21 @@ define void @s_shuffle_v2p3_v8p3__14_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18200,17 +18200,17 @@ define void @s_shuffle_v2p3_v8p3__u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18280,18 +18280,18 @@ define void @s_shuffle_v2p3_v8p3__2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18339,17 +18339,17 @@ define void @s_shuffle_v2p3_v8p3__4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18401,18 +18401,18 @@ define void @s_shuffle_v2p3_v8p3__6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18460,17 +18460,17 @@ define void @s_shuffle_v2p3_v8p3__8_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18508,21 +18508,21 @@ define void @s_shuffle_v2p3_v8p3__9_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18563,21 +18563,21 @@ define void @s_shuffle_v2p3_v8p3__10_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18616,21 +18616,21 @@ define void @s_shuffle_v2p3_v8p3__11_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18669,21 +18669,21 @@ define void @s_shuffle_v2p3_v8p3__12_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18722,21 +18722,21 @@ define void @s_shuffle_v2p3_v8p3__13_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18777,21 +18777,21 @@ define void @s_shuffle_v2p3_v8p3__14_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -18824,17 +18824,17 @@ define void @s_shuffle_v2p3_v8p3__u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18886,18 +18886,18 @@ define void @s_shuffle_v2p3_v8p3__1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18931,18 +18931,18 @@ define void @s_shuffle_v2p3_v8p3__2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -18976,18 +18976,18 @@ define void @s_shuffle_v2p3_v8p3__3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19039,18 +19039,18 @@ define void @s_shuffle_v2p3_v8p3__5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19084,18 +19084,18 @@ define void @s_shuffle_v2p3_v8p3__6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19129,18 +19129,18 @@ define void @s_shuffle_v2p3_v8p3__7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19172,17 +19172,17 @@ define void @s_shuffle_v2p3_v8p3__8_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19222,21 +19222,21 @@ define void @s_shuffle_v2p3_v8p3__9_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19277,21 +19277,21 @@ define void @s_shuffle_v2p3_v8p3__10_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19332,21 +19332,21 @@ define void @s_shuffle_v2p3_v8p3__11_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19385,21 +19385,21 @@ define void @s_shuffle_v2p3_v8p3__12_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19440,21 +19440,21 @@ define void @s_shuffle_v2p3_v8p3__13_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19495,21 +19495,21 @@ define void @s_shuffle_v2p3_v8p3__14_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19542,17 +19542,17 @@ define void @s_shuffle_v2p3_v8p3__u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19604,18 +19604,18 @@ define void @s_shuffle_v2p3_v8p3__1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19649,18 +19649,18 @@ define void @s_shuffle_v2p3_v8p3__2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19694,18 +19694,18 @@ define void @s_shuffle_v2p3_v8p3__3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19757,18 +19757,18 @@ define void @s_shuffle_v2p3_v8p3__5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19800,17 +19800,17 @@ define void @s_shuffle_v2p3_v8p3__6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19844,18 +19844,18 @@ define void @s_shuffle_v2p3_v8p3__7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19887,17 +19887,17 @@ define void @s_shuffle_v2p3_v8p3__8_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -19937,21 +19937,21 @@ define void @s_shuffle_v2p3_v8p3__9_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -19992,21 +19992,21 @@ define void @s_shuffle_v2p3_v8p3__10_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20047,21 +20047,21 @@ define void @s_shuffle_v2p3_v8p3__11_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20100,21 +20100,21 @@ define void @s_shuffle_v2p3_v8p3__12_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20155,21 +20155,21 @@ define void @s_shuffle_v2p3_v8p3__13_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20210,21 +20210,21 @@ define void @s_shuffle_v2p3_v8p3__14_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20269,17 +20269,17 @@ define void @s_shuffle_v2p3_v8p3__0_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20311,17 +20311,17 @@ define void @s_shuffle_v2p3_v8p3__1_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20353,17 +20353,17 @@ define void @s_shuffle_v2p3_v8p3__2_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20395,17 +20395,17 @@ define void @s_shuffle_v2p3_v8p3__3_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20435,17 +20435,17 @@ define void @s_shuffle_v2p3_v8p3__4_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20477,17 +20477,17 @@ define void @s_shuffle_v2p3_v8p3__5_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20519,17 +20519,17 @@ define void @s_shuffle_v2p3_v8p3__6_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20561,17 +20561,17 @@ define void @s_shuffle_v2p3_v8p3__7_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<2 x ptr addrspace(3)> %shuf) @@ -20619,18 +20619,18 @@ define void @s_shuffle_v2p3_v8p3__9_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20665,18 +20665,18 @@ define void @s_shuffle_v2p3_v8p3__10_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20711,18 +20711,18 @@ define void @s_shuffle_v2p3_v8p3__11_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20776,18 +20776,18 @@ define void @s_shuffle_v2p3_v8p3__13_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20822,18 +20822,18 @@ define void @s_shuffle_v2p3_v8p3__14_8() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_8: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s0 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s0 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20864,17 +20864,17 @@ define void @s_shuffle_v2p3_v8p3__u_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20913,20 +20913,20 @@ define void @s_shuffle_v2p3_v8p3__0_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -20965,20 +20965,20 @@ define void @s_shuffle_v2p3_v8p3__1_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21019,22 +21019,22 @@ define void @s_shuffle_v2p3_v8p3__2_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21073,20 +21073,20 @@ define void @s_shuffle_v2p3_v8p3__3_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21125,21 +21125,21 @@ define void @s_shuffle_v2p3_v8p3__4_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21178,20 +21178,20 @@ define void @s_shuffle_v2p3_v8p3__5_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21232,21 +21232,21 @@ define void @s_shuffle_v2p3_v8p3__6_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21285,20 +21285,20 @@ define void @s_shuffle_v2p3_v8p3__7_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21329,17 +21329,17 @@ define void @s_shuffle_v2p3_v8p3__8_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__8_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__8_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21393,18 +21393,18 @@ define void @s_shuffle_v2p3_v8p3__10_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21496,18 +21496,18 @@ define void @s_shuffle_v2p3_v8p3__14_9() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_9: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21540,17 +21540,17 @@ define void @s_shuffle_v2p3_v8p3__u_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21589,20 +21589,20 @@ define void @s_shuffle_v2p3_v8p3__0_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21643,22 +21643,22 @@ define void @s_shuffle_v2p3_v8p3__1_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21699,22 +21699,22 @@ define void @s_shuffle_v2p3_v8p3__2_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21755,22 +21755,22 @@ define void @s_shuffle_v2p3_v8p3__3_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21809,21 +21809,21 @@ define void @s_shuffle_v2p3_v8p3__4_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21864,21 +21864,21 @@ define void @s_shuffle_v2p3_v8p3__5_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21919,21 +21919,21 @@ define void @s_shuffle_v2p3_v8p3__6_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -21974,21 +21974,21 @@ define void @s_shuffle_v2p3_v8p3__7_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22042,18 +22042,18 @@ define void @s_shuffle_v2p3_v8p3__9_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22088,18 +22088,18 @@ define void @s_shuffle_v2p3_v8p3__10_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22134,18 +22134,18 @@ define void @s_shuffle_v2p3_v8p3__11_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22199,18 +22199,18 @@ define void @s_shuffle_v2p3_v8p3__13_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22245,18 +22245,18 @@ define void @s_shuffle_v2p3_v8p3__14_10() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_10: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s2 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22289,17 +22289,17 @@ define void @s_shuffle_v2p3_v8p3__u_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22338,20 +22338,20 @@ define void @s_shuffle_v2p3_v8p3__0_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22392,22 +22392,22 @@ define void @s_shuffle_v2p3_v8p3__1_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22448,22 +22448,22 @@ define void @s_shuffle_v2p3_v8p3__2_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22504,22 +22504,22 @@ define void @s_shuffle_v2p3_v8p3__3_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22558,21 +22558,21 @@ define void @s_shuffle_v2p3_v8p3__4_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22613,21 +22613,21 @@ define void @s_shuffle_v2p3_v8p3__5_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22668,21 +22668,21 @@ define void @s_shuffle_v2p3_v8p3__6_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22723,21 +22723,21 @@ define void @s_shuffle_v2p3_v8p3__7_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22791,18 +22791,18 @@ define void @s_shuffle_v2p3_v8p3__9_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22835,17 +22835,17 @@ define void @s_shuffle_v2p3_v8p3__10_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22880,18 +22880,18 @@ define void @s_shuffle_v2p3_v8p3__11_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22945,18 +22945,18 @@ define void @s_shuffle_v2p3_v8p3__13_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -22991,18 +22991,18 @@ define void @s_shuffle_v2p3_v8p3__14_11() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_11: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23035,17 +23035,17 @@ define void @s_shuffle_v2p3_v8p3__u_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23084,20 +23084,20 @@ define void @s_shuffle_v2p3_v8p3__0_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23138,21 +23138,21 @@ define void @s_shuffle_v2p3_v8p3__1_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23193,22 +23193,22 @@ define void @s_shuffle_v2p3_v8p3__2_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s8 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s8 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23249,21 +23249,21 @@ define void @s_shuffle_v2p3_v8p3__3_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23302,21 +23302,21 @@ define void @s_shuffle_v2p3_v8p3__4_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23357,21 +23357,21 @@ define void @s_shuffle_v2p3_v8p3__5_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23412,21 +23412,21 @@ define void @s_shuffle_v2p3_v8p3__6_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s12 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s12 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23467,21 +23467,21 @@ define void @s_shuffle_v2p3_v8p3__7_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s12 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s12 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23535,18 +23535,18 @@ define void @s_shuffle_v2p3_v8p3__9_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23581,18 +23581,18 @@ define void @s_shuffle_v2p3_v8p3__10_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23627,18 +23627,18 @@ define void @s_shuffle_v2p3_v8p3__11_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23692,18 +23692,18 @@ define void @s_shuffle_v2p3_v8p3__13_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23738,18 +23738,18 @@ define void @s_shuffle_v2p3_v8p3__14_12() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_12: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s4 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s4 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23780,17 +23780,17 @@ define void @s_shuffle_v2p3_v8p3__u_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23829,20 +23829,20 @@ define void @s_shuffle_v2p3_v8p3__0_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23881,21 +23881,21 @@ define void @s_shuffle_v2p3_v8p3__1_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23936,22 +23936,22 @@ define void @s_shuffle_v2p3_v8p3__2_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s9 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -23990,21 +23990,21 @@ define void @s_shuffle_v2p3_v8p3__3_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24043,21 +24043,21 @@ define void @s_shuffle_v2p3_v8p3__4_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24096,21 +24096,21 @@ define void @s_shuffle_v2p3_v8p3__5_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24151,21 +24151,21 @@ define void @s_shuffle_v2p3_v8p3__6_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s13 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24204,21 +24204,21 @@ define void @s_shuffle_v2p3_v8p3__7_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24291,18 +24291,18 @@ define void @s_shuffle_v2p3_v8p3__10_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24352,17 +24352,17 @@ define void @s_shuffle_v2p3_v8p3__12_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__12_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__12_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24416,18 +24416,18 @@ define void @s_shuffle_v2p3_v8p3__14_13() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_13: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24460,17 +24460,17 @@ define void @s_shuffle_v2p3_v8p3__u_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24509,20 +24509,20 @@ define void @s_shuffle_v2p3_v8p3__0_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24563,22 +24563,22 @@ define void @s_shuffle_v2p3_v8p3__1_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24619,22 +24619,22 @@ define void @s_shuffle_v2p3_v8p3__2_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s10 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24675,22 +24675,22 @@ define void @s_shuffle_v2p3_v8p3__3_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24729,21 +24729,21 @@ define void @s_shuffle_v2p3_v8p3__4_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24784,21 +24784,21 @@ define void @s_shuffle_v2p3_v8p3__5_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24839,21 +24839,21 @@ define void @s_shuffle_v2p3_v8p3__6_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s14 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s14 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24894,21 +24894,21 @@ define void @s_shuffle_v2p3_v8p3__7_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s14 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s14 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -24962,18 +24962,18 @@ define void @s_shuffle_v2p3_v8p3__9_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25008,18 +25008,18 @@ define void @s_shuffle_v2p3_v8p3__10_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25054,18 +25054,18 @@ define void @s_shuffle_v2p3_v8p3__11_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25119,18 +25119,18 @@ define void @s_shuffle_v2p3_v8p3__13_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25165,18 +25165,18 @@ define void @s_shuffle_v2p3_v8p3__14_14() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_14: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s6 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s6 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25209,17 +25209,17 @@ define void @s_shuffle_v2p3_v8p3__u_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__u_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__u_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25258,20 +25258,20 @@ define void @s_shuffle_v2p3_v8p3__0_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__0_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__0_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25312,22 +25312,22 @@ define void @s_shuffle_v2p3_v8p3__1_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__1_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__1_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25368,22 +25368,22 @@ define void @s_shuffle_v2p3_v8p3__2_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__2_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s11 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__2_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25424,22 +25424,22 @@ define void @s_shuffle_v2p3_v8p3__3_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__3_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[10:11] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__3_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25478,21 +25478,21 @@ define void @s_shuffle_v2p3_v8p3__4_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__4_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__4_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25533,21 +25533,21 @@ define void @s_shuffle_v2p3_v8p3__5_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__5_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__5_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25588,21 +25588,21 @@ define void @s_shuffle_v2p3_v8p3__6_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__6_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s7, s15 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__6_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s7, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25643,21 +25643,21 @@ define void @s_shuffle_v2p3_v8p3__7_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__7_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[14:15] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__7_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25711,18 +25711,18 @@ define void @s_shuffle_v2p3_v8p3__9_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__9_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s1 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__9_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25757,18 +25757,18 @@ define void @s_shuffle_v2p3_v8p3__10_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__10_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s3, s7 -; GFX940-NEXT: s_mov_b64 s[8:9], s[2:3] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s3, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25803,18 +25803,18 @@ define void @s_shuffle_v2p3_v8p3__11_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__11_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s3 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__11_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25868,18 +25868,18 @@ define void @s_shuffle_v2p3_v8p3__13_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__13_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s6, s5 -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__13_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s6, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> @@ -25912,17 +25912,17 @@ define void @s_shuffle_v2p3_v8p3__14_15() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v2p3_v8p3__14_15: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v2p3_v8p3__14_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <8 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <8 x ptr addrspace(3)> %vec0, <8 x ptr addrspace(3)> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll index d52dee8ed2cb8..008e19b620520 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v2bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3bf16_v2bf16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3bf16_v2bf16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v3bf16_v2bf16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v3bf16_v2bf16__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v3bf16_v2bf16__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v3bf16_v2bf16__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -383,22 +383,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -441,22 +441,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -493,19 +493,19 @@ define void @v_shuffle_v3bf16_v2bf16__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -544,20 +544,20 @@ define void @v_shuffle_v3bf16_v2bf16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -592,19 +592,19 @@ define void @v_shuffle_v3bf16_v2bf16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -640,19 +640,19 @@ define void @v_shuffle_v3bf16_v2bf16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> zeroinitializer store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -686,19 +686,19 @@ define void @v_shuffle_v3bf16_v2bf16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -732,19 +732,19 @@ define void @v_shuffle_v3bf16_v2bf16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -784,22 +784,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -840,22 +840,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -898,22 +898,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -954,22 +954,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1004,18 +1004,18 @@ define void @v_shuffle_v3bf16_v2bf16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1049,18 +1049,18 @@ define void @v_shuffle_v3bf16_v2bf16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1098,20 +1098,20 @@ define void @v_shuffle_v3bf16_v2bf16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1145,18 +1145,18 @@ define void @v_shuffle_v3bf16_v2bf16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1200,23 +1200,23 @@ define void @v_shuffle_v3bf16_v2bf16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1259,22 +1259,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1317,22 +1317,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1373,22 +1373,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1430,16 +1430,16 @@ define void @v_shuffle_v3bf16_v2bf16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1471,17 +1471,17 @@ define void @v_shuffle_v3bf16_v2bf16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1526,19 +1526,19 @@ define void @v_shuffle_v3bf16_v2bf16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1573,19 +1573,19 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1626,22 +1626,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1684,22 +1684,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1734,18 +1734,18 @@ define void @v_shuffle_v3bf16_v2bf16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1790,23 +1790,23 @@ define void @v_shuffle_v3bf16_v2bf16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1851,24 +1851,24 @@ define void @v_shuffle_v3bf16_v2bf16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1903,18 +1903,18 @@ define void @v_shuffle_v3bf16_v2bf16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -1951,19 +1951,19 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2006,22 +2006,22 @@ define void @v_shuffle_v3bf16_v2bf16__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2066,24 +2066,24 @@ define void @v_shuffle_v3bf16_v2bf16__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2120,19 +2120,19 @@ define void @v_shuffle_v3bf16_v2bf16__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v2bf16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2178,17 +2178,17 @@ define void @s_shuffle_v3bf16_v2bf16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -2221,17 +2221,17 @@ define void @s_shuffle_v3bf16_v2bf16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -2279,17 +2279,17 @@ define void @s_shuffle_v3bf16_v2bf16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2331,21 +2331,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2389,22 +2389,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2440,18 +2440,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2487,18 +2487,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2540,21 +2540,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2598,22 +2598,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2649,18 +2649,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2696,18 +2696,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2781,18 +2781,18 @@ define void @s_shuffle_v3bf16_v2bf16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -2852,21 +2852,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2906,20 +2906,20 @@ define void @s_shuffle_v3bf16_v2bf16__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -2963,22 +2963,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3020,21 +3020,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3108,18 +3108,18 @@ define void @s_shuffle_v3bf16_v2bf16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -3181,22 +3181,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3238,21 +3238,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3296,22 +3296,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3355,22 +3355,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3417,17 +3417,17 @@ define void @s_shuffle_v3bf16_v2bf16__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -3460,17 +3460,17 @@ define void @s_shuffle_v3bf16_v2bf16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -3520,18 +3520,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3593,21 +3593,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3651,22 +3651,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3728,21 +3728,21 @@ define void @s_shuffle_v3bf16_v2bf16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3786,22 +3786,22 @@ define void @s_shuffle_v3bf16_v2bf16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3857,18 +3857,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3910,21 +3910,21 @@ define void @s_shuffle_v3bf16_v2bf16__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -3968,22 +3968,22 @@ define void @s_shuffle_v3bf16_v2bf16__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> @@ -4019,18 +4019,18 @@ define void @s_shuffle_v3bf16_v2bf16__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v2bf16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v2bf16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll index f32988d60edd8..99c9480adc410 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v3bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3bf16_v3bf16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -41,17 +41,17 @@ define void @v_shuffle_v3bf16_v3bf16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -84,17 +84,17 @@ define void @v_shuffle_v3bf16_v3bf16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -125,16 +125,16 @@ define void @v_shuffle_v3bf16_v3bf16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v3bf16_v3bf16__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -222,16 +222,16 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -274,22 +274,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -332,22 +332,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -390,22 +390,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -442,18 +442,18 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -490,18 +490,18 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -538,18 +538,18 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -594,23 +594,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -655,23 +655,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -716,23 +716,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -771,19 +771,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -824,20 +824,20 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -876,19 +876,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -925,19 +925,19 @@ define void @v_shuffle_v3bf16_v3bf16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -974,19 +974,19 @@ define void @v_shuffle_v3bf16_v3bf16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> zeroinitializer @@ -1021,19 +1021,19 @@ define void @v_shuffle_v3bf16_v3bf16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1070,19 +1070,19 @@ define void @v_shuffle_v3bf16_v3bf16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1117,19 +1117,19 @@ define void @v_shuffle_v3bf16_v3bf16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1170,22 +1170,22 @@ define void @v_shuffle_v3bf16_v3bf16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1230,23 +1230,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1287,20 +1287,20 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1345,23 +1345,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1406,23 +1406,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1467,23 +1467,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1528,23 +1528,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1581,18 +1581,18 @@ define void @v_shuffle_v3bf16_v3bf16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1627,18 +1627,18 @@ define void @v_shuffle_v3bf16_v3bf16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1677,20 +1677,20 @@ define void @v_shuffle_v3bf16_v3bf16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1729,20 +1729,20 @@ define void @v_shuffle_v3bf16_v3bf16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1777,18 +1777,18 @@ define void @v_shuffle_v3bf16_v3bf16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -1833,24 +1833,24 @@ define void @v_shuffle_v3bf16_v3bf16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1897,24 +1897,24 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1957,21 +1957,21 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2018,24 +2018,24 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2082,24 +2082,24 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2144,23 +2144,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2205,23 +2205,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2258,19 +2258,19 @@ define void @v_shuffle_v3bf16_v3bf16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2307,19 +2307,19 @@ define void @v_shuffle_v3bf16_v3bf16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2354,19 +2354,19 @@ define void @v_shuffle_v3bf16_v3bf16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2403,19 +2403,19 @@ define void @v_shuffle_v3bf16_v3bf16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2450,19 +2450,19 @@ define void @v_shuffle_v3bf16_v3bf16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2503,22 +2503,22 @@ define void @v_shuffle_v3bf16_v3bf16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2563,23 +2563,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2620,20 +2620,20 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2678,23 +2678,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2739,23 +2739,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2800,23 +2800,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2861,23 +2861,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2924,17 +2924,17 @@ define void @v_shuffle_v3bf16_v3bf16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -2967,17 +2967,17 @@ define void @v_shuffle_v3bf16_v3bf16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -3008,16 +3008,16 @@ define void @v_shuffle_v3bf16_v3bf16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -3064,19 +3064,19 @@ define void @v_shuffle_v3bf16_v3bf16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3115,19 +3115,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3162,17 +3162,17 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3217,22 +3217,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3277,22 +3277,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3337,22 +3337,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3391,19 +3391,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3440,18 +3440,18 @@ define void @v_shuffle_v3bf16_v3bf16__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3498,24 +3498,24 @@ define void @v_shuffle_v3bf16_v3bf16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3562,24 +3562,24 @@ define void @v_shuffle_v3bf16_v3bf16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3626,24 +3626,24 @@ define void @v_shuffle_v3bf16_v3bf16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3680,18 +3680,18 @@ define void @v_shuffle_v3bf16_v3bf16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3732,20 +3732,20 @@ define void @v_shuffle_v3bf16_v3bf16__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3786,20 +3786,20 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3836,18 +3836,18 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3894,23 +3894,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3957,23 +3957,23 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4020,24 +4020,24 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4078,20 +4078,20 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4128,19 +4128,19 @@ define void @v_shuffle_v3bf16_v3bf16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4185,22 +4185,22 @@ define void @v_shuffle_v3bf16_v3bf16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4243,22 +4243,22 @@ define void @v_shuffle_v3bf16_v3bf16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4303,22 +4303,22 @@ define void @v_shuffle_v3bf16_v3bf16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4357,19 +4357,19 @@ define void @v_shuffle_v3bf16_v3bf16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4406,19 +4406,19 @@ define void @v_shuffle_v3bf16_v3bf16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4453,17 +4453,17 @@ define void @v_shuffle_v3bf16_v3bf16__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4508,22 +4508,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4568,22 +4568,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4628,22 +4628,22 @@ define void @v_shuffle_v3bf16_v3bf16__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4682,19 +4682,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4733,19 +4733,19 @@ define void @v_shuffle_v3bf16_v3bf16__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v3bf16__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v3bf16__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4794,17 +4794,17 @@ define void @s_shuffle_v3bf16_v3bf16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -4838,17 +4838,17 @@ define void @s_shuffle_v3bf16_v3bf16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -4882,17 +4882,17 @@ define void @s_shuffle_v3bf16_v3bf16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -4942,17 +4942,17 @@ define void @s_shuffle_v3bf16_v3bf16__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4988,17 +4988,17 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5040,20 +5040,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5097,21 +5097,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5153,20 +5153,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5202,17 +5202,17 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5250,18 +5250,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5297,17 +5297,17 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5351,21 +5351,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5409,21 +5409,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5465,20 +5465,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5516,18 +5516,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5565,18 +5565,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5636,18 +5636,18 @@ define void @s_shuffle_v3bf16_v3bf16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -5683,18 +5683,18 @@ define void @s_shuffle_v3bf16_v3bf16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> zeroinitializer @@ -5732,19 +5732,19 @@ define void @s_shuffle_v3bf16_v3bf16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -5780,18 +5780,18 @@ define void @s_shuffle_v3bf16_v3bf16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -5827,18 +5827,18 @@ define void @s_shuffle_v3bf16_v3bf16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -5882,22 +5882,22 @@ define void @s_shuffle_v3bf16_v3bf16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5941,21 +5941,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5999,21 +5999,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6059,22 +6059,22 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6118,21 +6118,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6176,21 +6176,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6236,22 +6236,22 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6329,18 +6329,18 @@ define void @s_shuffle_v3bf16_v3bf16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -6376,18 +6376,18 @@ define void @s_shuffle_v3bf16_v3bf16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -6451,22 +6451,22 @@ define void @s_shuffle_v3bf16_v3bf16__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6510,21 +6510,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6568,21 +6568,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6626,21 +6626,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6684,21 +6684,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6742,21 +6742,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6802,22 +6802,22 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6895,18 +6895,18 @@ define void @s_shuffle_v3bf16_v3bf16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -6988,21 +6988,21 @@ define void @s_shuffle_v3bf16_v3bf16__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7044,20 +7044,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7099,20 +7099,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7154,20 +7154,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7211,21 +7211,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7267,20 +7267,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7324,21 +7324,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7388,17 +7388,17 @@ define void @s_shuffle_v3bf16_v3bf16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -7432,17 +7432,17 @@ define void @s_shuffle_v3bf16_v3bf16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -7476,17 +7476,17 @@ define void @s_shuffle_v3bf16_v3bf16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <3 x i32> @@ -7540,19 +7540,19 @@ define void @s_shuffle_v3bf16_v3bf16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7590,18 +7590,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7639,18 +7639,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7694,21 +7694,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7754,22 +7754,22 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7813,21 +7813,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7867,19 +7867,19 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7945,21 +7945,21 @@ define void @s_shuffle_v3bf16_v3bf16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8005,22 +8005,22 @@ define void @s_shuffle_v3bf16_v3bf16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8064,21 +8064,21 @@ define void @s_shuffle_v3bf16_v3bf16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8138,18 +8138,18 @@ define void @s_shuffle_v3bf16_v3bf16__4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8187,18 +8187,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8236,18 +8236,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8291,21 +8291,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8351,22 +8351,22 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8410,21 +8410,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8462,18 +8462,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8537,20 +8537,20 @@ define void @s_shuffle_v3bf16_v3bf16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8594,21 +8594,21 @@ define void @s_shuffle_v3bf16_v3bf16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8650,20 +8650,20 @@ define void @s_shuffle_v3bf16_v3bf16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8723,18 +8723,18 @@ define void @s_shuffle_v3bf16_v3bf16__4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8798,20 +8798,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8855,21 +8855,21 @@ define void @s_shuffle_v3bf16_v3bf16__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8911,20 +8911,20 @@ define void @s_shuffle_v3bf16_v3bf16__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8984,18 +8984,18 @@ define void @s_shuffle_v3bf16_v3bf16__5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v3bf16__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v3bf16__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll index 5ce0252e0a9aa..311ca98227da3 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3bf16.v4bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3bf16_v4bf16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3bf16_v4bf16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -82,17 +82,17 @@ define void @v_shuffle_v3bf16_v4bf16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -122,16 +122,16 @@ define void @v_shuffle_v3bf16_v4bf16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v3bf16_v4bf16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v3bf16_v4bf16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -257,16 +257,16 @@ define void @v_shuffle_v3bf16_v4bf16__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -299,17 +299,17 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -348,21 +348,21 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -403,22 +403,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -457,21 +457,21 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -512,22 +512,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -560,17 +560,17 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -605,18 +605,18 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -866,23 +866,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -925,23 +925,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -978,19 +978,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1027,19 +1027,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1076,19 +1076,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1127,20 +1127,20 @@ define void @v_shuffle_v3bf16_v4bf16__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1175,19 +1175,19 @@ define void @v_shuffle_v3bf16_v4bf16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1223,19 +1223,19 @@ define void @v_shuffle_v3bf16_v4bf16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> zeroinitializer store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1269,19 +1269,19 @@ define void @v_shuffle_v3bf16_v4bf16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1317,19 +1317,19 @@ define void @v_shuffle_v3bf16_v4bf16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1363,19 +1363,19 @@ define void @v_shuffle_v3bf16_v4bf16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1409,19 +1409,19 @@ define void @v_shuffle_v3bf16_v4bf16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1461,22 +1461,22 @@ define void @v_shuffle_v3bf16_v4bf16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1519,23 +1519,23 @@ define void @v_shuffle_v3bf16_v4bf16__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1576,22 +1576,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1632,22 +1632,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1690,23 +1690,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1747,22 +1747,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1805,23 +1805,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1862,22 +1862,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1920,23 +1920,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -1977,22 +1977,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2027,18 +2027,18 @@ define void @v_shuffle_v3bf16_v4bf16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2072,18 +2072,18 @@ define void @v_shuffle_v3bf16_v4bf16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2121,20 +2121,20 @@ define void @v_shuffle_v3bf16_v4bf16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2172,20 +2172,20 @@ define void @v_shuffle_v3bf16_v4bf16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2223,20 +2223,20 @@ define void @v_shuffle_v3bf16_v4bf16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2270,18 +2270,18 @@ define void @v_shuffle_v3bf16_v4bf16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2325,24 +2325,24 @@ define void @v_shuffle_v3bf16_v4bf16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2387,24 +2387,24 @@ define void @v_shuffle_v3bf16_v4bf16__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2449,24 +2449,24 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2507,22 +2507,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2563,22 +2563,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2677,23 +2677,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2734,22 +2734,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2792,23 +2792,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2849,22 +2849,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -2899,19 +2899,19 @@ define void @v_shuffle_v3bf16_v4bf16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2947,19 +2947,19 @@ define void @v_shuffle_v3bf16_v4bf16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2993,19 +2993,19 @@ define void @v_shuffle_v3bf16_v4bf16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3041,19 +3041,19 @@ define void @v_shuffle_v3bf16_v4bf16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3087,19 +3087,19 @@ define void @v_shuffle_v3bf16_v4bf16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3133,19 +3133,19 @@ define void @v_shuffle_v3bf16_v4bf16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3185,22 +3185,22 @@ define void @v_shuffle_v3bf16_v4bf16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3243,23 +3243,23 @@ define void @v_shuffle_v3bf16_v4bf16__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3300,22 +3300,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3356,22 +3356,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3412,22 +3412,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3470,23 +3470,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3529,23 +3529,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3586,22 +3586,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3644,23 +3644,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3701,22 +3701,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -3755,20 +3755,20 @@ define void @v_shuffle_v3bf16_v4bf16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3806,20 +3806,20 @@ define void @v_shuffle_v3bf16_v4bf16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3857,20 +3857,20 @@ define void @v_shuffle_v3bf16_v4bf16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3902,17 +3902,17 @@ define void @v_shuffle_v3bf16_v4bf16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3950,20 +3950,20 @@ define void @v_shuffle_v3bf16_v4bf16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4001,20 +4001,20 @@ define void @v_shuffle_v3bf16_v4bf16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4058,24 +4058,24 @@ define void @v_shuffle_v3bf16_v4bf16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4120,24 +4120,24 @@ define void @v_shuffle_v3bf16_v4bf16__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4182,24 +4182,24 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4240,22 +4240,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4296,22 +4296,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4354,23 +4354,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4411,22 +4411,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4467,22 +4467,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4525,23 +4525,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4582,22 +4582,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4641,17 +4641,17 @@ define void @v_shuffle_v3bf16_v4bf16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4683,17 +4683,17 @@ define void @v_shuffle_v3bf16_v4bf16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4723,16 +4723,16 @@ define void @v_shuffle_v3bf16_v4bf16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4764,17 +4764,17 @@ define void @v_shuffle_v3bf16_v4bf16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> store <3 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4819,19 +4819,19 @@ define void @v_shuffle_v3bf16_v4bf16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4868,19 +4868,19 @@ define void @v_shuffle_v3bf16_v4bf16__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4915,19 +4915,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -4962,19 +4962,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5015,22 +5015,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5073,22 +5073,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5129,22 +5129,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5187,22 +5187,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5239,19 +5239,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5286,19 +5286,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5333,18 +5333,18 @@ define void @v_shuffle_v3bf16_v4bf16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5389,24 +5389,24 @@ define void @v_shuffle_v3bf16_v4bf16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5451,24 +5451,24 @@ define void @v_shuffle_v3bf16_v4bf16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5513,24 +5513,24 @@ define void @v_shuffle_v3bf16_v4bf16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5575,24 +5575,24 @@ define void @v_shuffle_v3bf16_v4bf16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5627,18 +5627,18 @@ define void @v_shuffle_v3bf16_v4bf16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5677,20 +5677,20 @@ define void @v_shuffle_v3bf16_v4bf16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5729,20 +5729,20 @@ define void @v_shuffle_v3bf16_v4bf16__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5781,20 +5781,20 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5829,19 +5829,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5882,22 +5882,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5940,22 +5940,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -5996,22 +5996,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6054,22 +6054,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6104,19 +6104,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6151,19 +6151,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6198,19 +6198,19 @@ define void @v_shuffle_v3bf16_v4bf16__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6253,22 +6253,22 @@ define void @v_shuffle_v3bf16_v4bf16__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6309,22 +6309,22 @@ define void @v_shuffle_v3bf16_v4bf16__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6367,22 +6367,22 @@ define void @v_shuffle_v3bf16_v4bf16__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6423,22 +6423,22 @@ define void @v_shuffle_v3bf16_v4bf16__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6475,19 +6475,19 @@ define void @v_shuffle_v3bf16_v4bf16__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6522,19 +6522,19 @@ define void @v_shuffle_v3bf16_v4bf16__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6571,19 +6571,19 @@ define void @v_shuffle_v3bf16_v4bf16__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6618,19 +6618,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6665,19 +6665,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6718,22 +6718,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6776,22 +6776,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6832,22 +6832,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6890,22 +6890,22 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6940,19 +6940,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -6989,19 +6989,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7040,20 +7040,20 @@ define void @v_shuffle_v3bf16_v4bf16__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7098,24 +7098,24 @@ define void @v_shuffle_v3bf16_v4bf16__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7160,24 +7160,24 @@ define void @v_shuffle_v3bf16_v4bf16__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7222,24 +7222,24 @@ define void @v_shuffle_v3bf16_v4bf16__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7284,24 +7284,24 @@ define void @v_shuffle_v3bf16_v4bf16__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7340,20 +7340,20 @@ define void @v_shuffle_v3bf16_v4bf16__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7392,20 +7392,20 @@ define void @v_shuffle_v3bf16_v4bf16__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7438,17 +7438,17 @@ define void @v_shuffle_v3bf16_v4bf16__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7485,19 +7485,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7540,23 +7540,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7601,24 +7601,24 @@ define void @v_shuffle_v3bf16_v4bf16__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7661,23 +7661,23 @@ define void @v_shuffle_v3bf16_v4bf16__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7722,24 +7722,24 @@ define void @v_shuffle_v3bf16_v4bf16__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7776,19 +7776,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v3, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7827,20 +7827,20 @@ define void @v_shuffle_v3bf16_v4bf16__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7877,19 +7877,19 @@ define void @v_shuffle_v3bf16_v4bf16__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3bf16_v4bf16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3bf16_v4bf16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -7935,17 +7935,17 @@ define void @s_shuffle_v3bf16_v4bf16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -7978,17 +7978,17 @@ define void @s_shuffle_v3bf16_v4bf16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -8021,17 +8021,17 @@ define void @s_shuffle_v3bf16_v4bf16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -8064,17 +8064,17 @@ define void @s_shuffle_v3bf16_v4bf16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -8122,17 +8122,17 @@ define void @s_shuffle_v3bf16_v4bf16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8166,17 +8166,17 @@ define void @s_shuffle_v3bf16_v4bf16__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8210,17 +8210,17 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8262,21 +8262,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8320,22 +8320,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8377,21 +8377,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8435,22 +8435,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8486,18 +8486,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8535,19 +8535,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8583,18 +8583,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8630,18 +8630,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8685,22 +8685,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8744,22 +8744,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8801,21 +8801,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8859,22 +8859,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8912,19 +8912,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -8962,19 +8962,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9010,18 +9010,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9057,18 +9057,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9104,18 +9104,18 @@ define void @s_shuffle_v3bf16_v4bf16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9150,18 +9150,18 @@ define void @s_shuffle_v3bf16_v4bf16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> zeroinitializer %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9198,19 +9198,19 @@ define void @s_shuffle_v3bf16_v4bf16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9245,18 +9245,18 @@ define void @s_shuffle_v3bf16_v4bf16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9293,19 +9293,19 @@ define void @s_shuffle_v3bf16_v4bf16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9340,18 +9340,18 @@ define void @s_shuffle_v3bf16_v4bf16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -9394,22 +9394,22 @@ define void @s_shuffle_v3bf16_v4bf16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9451,21 +9451,21 @@ define void @s_shuffle_v3bf16_v4bf16__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9509,22 +9509,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9566,21 +9566,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9626,23 +9626,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9686,22 +9686,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9747,23 +9747,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9807,22 +9807,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9868,23 +9868,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -9928,22 +9928,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10017,18 +10017,18 @@ define void @s_shuffle_v3bf16_v4bf16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -10063,18 +10063,18 @@ define void @s_shuffle_v3bf16_v4bf16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -10111,19 +10111,19 @@ define void @s_shuffle_v3bf16_v4bf16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -10185,22 +10185,22 @@ define void @s_shuffle_v3bf16_v4bf16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10242,21 +10242,21 @@ define void @s_shuffle_v3bf16_v4bf16__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10300,22 +10300,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10357,21 +10357,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10415,22 +10415,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10474,22 +10474,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10535,23 +10535,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10595,22 +10595,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10656,23 +10656,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10716,22 +10716,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10805,18 +10805,18 @@ define void @s_shuffle_v3bf16_v4bf16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -10870,18 +10870,18 @@ define void @s_shuffle_v3bf16_v4bf16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -10941,21 +10941,21 @@ define void @s_shuffle_v3bf16_v4bf16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -10995,20 +10995,20 @@ define void @s_shuffle_v3bf16_v4bf16__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11050,21 +11050,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11104,20 +11104,20 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11159,21 +11159,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11217,22 +11217,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11276,22 +11276,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11333,21 +11333,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11391,22 +11391,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11448,21 +11448,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11498,18 +11498,18 @@ define void @s_shuffle_v3bf16_v4bf16__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11544,18 +11544,18 @@ define void @s_shuffle_v3bf16_v4bf16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11592,19 +11592,19 @@ define void @s_shuffle_v3bf16_v4bf16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11639,18 +11639,18 @@ define void @s_shuffle_v3bf16_v4bf16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11685,18 +11685,18 @@ define void @s_shuffle_v3bf16_v4bf16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11731,18 +11731,18 @@ define void @s_shuffle_v3bf16_v4bf16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -11785,22 +11785,22 @@ define void @s_shuffle_v3bf16_v4bf16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11842,21 +11842,21 @@ define void @s_shuffle_v3bf16_v4bf16__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11900,22 +11900,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -11957,21 +11957,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12015,22 +12015,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12076,23 +12076,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12136,22 +12136,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12195,22 +12195,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12256,23 +12256,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12316,22 +12316,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12378,17 +12378,17 @@ define void @s_shuffle_v3bf16_v4bf16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -12421,17 +12421,17 @@ define void @s_shuffle_v3bf16_v4bf16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -12464,17 +12464,17 @@ define void @s_shuffle_v3bf16_v4bf16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -12507,17 +12507,17 @@ define void @s_shuffle_v3bf16_v4bf16__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %extend3 = shufflevector <3 x bfloat> %shuf, <3 x bfloat> poison, <4 x i32> @@ -12569,19 +12569,19 @@ define void @s_shuffle_v3bf16_v4bf16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12617,18 +12617,18 @@ define void @s_shuffle_v3bf16_v4bf16__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12666,19 +12666,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12714,18 +12714,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12769,22 +12769,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12830,23 +12830,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12890,22 +12890,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -12951,23 +12951,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13007,20 +13007,20 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13058,19 +13058,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13132,21 +13132,21 @@ define void @s_shuffle_v3bf16_v4bf16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13190,22 +13190,22 @@ define void @s_shuffle_v3bf16_v4bf16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13247,21 +13247,21 @@ define void @s_shuffle_v3bf16_v4bf16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13305,22 +13305,22 @@ define void @s_shuffle_v3bf16_v4bf16__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13376,18 +13376,18 @@ define void @s_shuffle_v3bf16_v4bf16__5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13423,18 +13423,18 @@ define void @s_shuffle_v3bf16_v4bf16__6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13472,19 +13472,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13520,18 +13520,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13575,22 +13575,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13636,23 +13636,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13696,22 +13696,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13757,23 +13757,23 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13811,19 +13811,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13861,19 +13861,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13933,20 +13933,20 @@ define void @s_shuffle_v3bf16_v4bf16__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -13988,21 +13988,21 @@ define void @s_shuffle_v3bf16_v4bf16__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14042,20 +14042,20 @@ define void @s_shuffle_v3bf16_v4bf16__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14097,21 +14097,21 @@ define void @s_shuffle_v3bf16_v4bf16__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14167,18 +14167,18 @@ define void @s_shuffle_v3bf16_v4bf16__5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14234,18 +14234,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14307,21 +14307,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14365,22 +14365,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14422,21 +14422,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14480,22 +14480,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14531,18 +14531,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14580,19 +14580,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14628,18 +14628,18 @@ define void @s_shuffle_v3bf16_v4bf16__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14681,21 +14681,21 @@ define void @s_shuffle_v3bf16_v4bf16__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14739,22 +14739,22 @@ define void @s_shuffle_v3bf16_v4bf16__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14796,21 +14796,21 @@ define void @s_shuffle_v3bf16_v4bf16__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14854,22 +14854,22 @@ define void @s_shuffle_v3bf16_v4bf16__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14905,18 +14905,18 @@ define void @s_shuffle_v3bf16_v4bf16__4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -14954,19 +14954,19 @@ define void @s_shuffle_v3bf16_v4bf16__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15002,18 +15002,18 @@ define void @s_shuffle_v3bf16_v4bf16__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15049,18 +15049,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15102,21 +15102,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15160,22 +15160,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15217,21 +15217,21 @@ define void @s_shuffle_v3bf16_v4bf16__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15275,22 +15275,22 @@ define void @s_shuffle_v3bf16_v4bf16__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15326,18 +15326,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15375,19 +15375,19 @@ define void @s_shuffle_v3bf16_v4bf16__7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> @@ -15423,18 +15423,18 @@ define void @s_shuffle_v3bf16_v4bf16__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3bf16_v4bf16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3bf16_v4bf16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll index 41aa23895726e..e34becc1065ff 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v2f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f16_v2f16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3f16_v2f16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v3f16_v2f16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v3f16_v2f16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v3f16_v2f16__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v3f16_v2f16__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v3f16_v2f16__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v3f16_v2f16__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -383,22 +383,22 @@ define void @v_shuffle_v3f16_v2f16__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -441,22 +441,22 @@ define void @v_shuffle_v3f16_v2f16__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -493,19 +493,19 @@ define void @v_shuffle_v3f16_v2f16__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -544,20 +544,20 @@ define void @v_shuffle_v3f16_v2f16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -592,19 +592,19 @@ define void @v_shuffle_v3f16_v2f16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -640,19 +640,19 @@ define void @v_shuffle_v3f16_v2f16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> zeroinitializer store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -686,19 +686,19 @@ define void @v_shuffle_v3f16_v2f16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -732,19 +732,19 @@ define void @v_shuffle_v3f16_v2f16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -784,22 +784,22 @@ define void @v_shuffle_v3f16_v2f16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -840,22 +840,22 @@ define void @v_shuffle_v3f16_v2f16__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -898,22 +898,22 @@ define void @v_shuffle_v3f16_v2f16__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -954,22 +954,22 @@ define void @v_shuffle_v3f16_v2f16__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1004,18 +1004,18 @@ define void @v_shuffle_v3f16_v2f16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1049,18 +1049,18 @@ define void @v_shuffle_v3f16_v2f16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1098,20 +1098,20 @@ define void @v_shuffle_v3f16_v2f16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1145,18 +1145,18 @@ define void @v_shuffle_v3f16_v2f16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1200,23 +1200,23 @@ define void @v_shuffle_v3f16_v2f16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1259,22 +1259,22 @@ define void @v_shuffle_v3f16_v2f16__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1317,22 +1317,22 @@ define void @v_shuffle_v3f16_v2f16__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1373,22 +1373,22 @@ define void @v_shuffle_v3f16_v2f16__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1430,16 +1430,16 @@ define void @v_shuffle_v3f16_v2f16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1471,17 +1471,17 @@ define void @v_shuffle_v3f16_v2f16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1526,19 +1526,19 @@ define void @v_shuffle_v3f16_v2f16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1573,19 +1573,19 @@ define void @v_shuffle_v3f16_v2f16__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1626,22 +1626,22 @@ define void @v_shuffle_v3f16_v2f16__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1684,22 +1684,22 @@ define void @v_shuffle_v3f16_v2f16__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1734,18 +1734,18 @@ define void @v_shuffle_v3f16_v2f16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1790,23 +1790,23 @@ define void @v_shuffle_v3f16_v2f16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1851,24 +1851,24 @@ define void @v_shuffle_v3f16_v2f16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1903,18 +1903,18 @@ define void @v_shuffle_v3f16_v2f16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -1951,19 +1951,19 @@ define void @v_shuffle_v3f16_v2f16__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2006,22 +2006,22 @@ define void @v_shuffle_v3f16_v2f16__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2066,24 +2066,24 @@ define void @v_shuffle_v3f16_v2f16__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2120,19 +2120,19 @@ define void @v_shuffle_v3f16_v2f16__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v2f16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v2f16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2178,17 +2178,17 @@ define void @s_shuffle_v3f16_v2f16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -2221,17 +2221,17 @@ define void @s_shuffle_v3f16_v2f16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -2279,17 +2279,17 @@ define void @s_shuffle_v3f16_v2f16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2331,21 +2331,21 @@ define void @s_shuffle_v3f16_v2f16__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2389,22 +2389,22 @@ define void @s_shuffle_v3f16_v2f16__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2440,18 +2440,18 @@ define void @s_shuffle_v3f16_v2f16__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2487,18 +2487,18 @@ define void @s_shuffle_v3f16_v2f16__3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2540,21 +2540,21 @@ define void @s_shuffle_v3f16_v2f16__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2598,22 +2598,22 @@ define void @s_shuffle_v3f16_v2f16__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2649,18 +2649,18 @@ define void @s_shuffle_v3f16_v2f16__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2696,18 +2696,18 @@ define void @s_shuffle_v3f16_v2f16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2781,18 +2781,18 @@ define void @s_shuffle_v3f16_v2f16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -2852,21 +2852,21 @@ define void @s_shuffle_v3f16_v2f16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2906,20 +2906,20 @@ define void @s_shuffle_v3f16_v2f16__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -2963,22 +2963,22 @@ define void @s_shuffle_v3f16_v2f16__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3020,21 +3020,21 @@ define void @s_shuffle_v3f16_v2f16__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3108,18 +3108,18 @@ define void @s_shuffle_v3f16_v2f16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -3181,22 +3181,22 @@ define void @s_shuffle_v3f16_v2f16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3238,21 +3238,21 @@ define void @s_shuffle_v3f16_v2f16__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3296,22 +3296,22 @@ define void @s_shuffle_v3f16_v2f16__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3355,22 +3355,22 @@ define void @s_shuffle_v3f16_v2f16__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3417,17 +3417,17 @@ define void @s_shuffle_v3f16_v2f16__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -3460,17 +3460,17 @@ define void @s_shuffle_v3f16_v2f16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -3520,18 +3520,18 @@ define void @s_shuffle_v3f16_v2f16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3593,21 +3593,21 @@ define void @s_shuffle_v3f16_v2f16__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3651,22 +3651,22 @@ define void @s_shuffle_v3f16_v2f16__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3728,21 +3728,21 @@ define void @s_shuffle_v3f16_v2f16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3786,22 +3786,22 @@ define void @s_shuffle_v3f16_v2f16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3857,18 +3857,18 @@ define void @s_shuffle_v3f16_v2f16__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3910,21 +3910,21 @@ define void @s_shuffle_v3f16_v2f16__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -3968,22 +3968,22 @@ define void @s_shuffle_v3f16_v2f16__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> @@ -4019,18 +4019,18 @@ define void @s_shuffle_v3f16_v2f16__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v2f16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v2f16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll index cf60666b8bd9f..84d42c882494c 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v3f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f16_v3f16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -41,17 +41,17 @@ define void @v_shuffle_v3f16_v3f16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -84,17 +84,17 @@ define void @v_shuffle_v3f16_v3f16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -125,16 +125,16 @@ define void @v_shuffle_v3f16_v3f16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v3f16_v3f16__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -222,16 +222,16 @@ define void @v_shuffle_v3f16_v3f16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -274,22 +274,22 @@ define void @v_shuffle_v3f16_v3f16__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -332,22 +332,22 @@ define void @v_shuffle_v3f16_v3f16__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -390,22 +390,22 @@ define void @v_shuffle_v3f16_v3f16__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -442,18 +442,18 @@ define void @v_shuffle_v3f16_v3f16__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -490,18 +490,18 @@ define void @v_shuffle_v3f16_v3f16__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -538,18 +538,18 @@ define void @v_shuffle_v3f16_v3f16__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -594,23 +594,23 @@ define void @v_shuffle_v3f16_v3f16__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -655,23 +655,23 @@ define void @v_shuffle_v3f16_v3f16__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -716,23 +716,23 @@ define void @v_shuffle_v3f16_v3f16__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -771,19 +771,19 @@ define void @v_shuffle_v3f16_v3f16__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -824,20 +824,20 @@ define void @v_shuffle_v3f16_v3f16__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -876,19 +876,19 @@ define void @v_shuffle_v3f16_v3f16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -925,19 +925,19 @@ define void @v_shuffle_v3f16_v3f16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -974,19 +974,19 @@ define void @v_shuffle_v3f16_v3f16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> zeroinitializer @@ -1021,19 +1021,19 @@ define void @v_shuffle_v3f16_v3f16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1070,19 +1070,19 @@ define void @v_shuffle_v3f16_v3f16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1117,19 +1117,19 @@ define void @v_shuffle_v3f16_v3f16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1170,22 +1170,22 @@ define void @v_shuffle_v3f16_v3f16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1230,23 +1230,23 @@ define void @v_shuffle_v3f16_v3f16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1287,20 +1287,20 @@ define void @v_shuffle_v3f16_v3f16__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1345,23 +1345,23 @@ define void @v_shuffle_v3f16_v3f16__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1406,23 +1406,23 @@ define void @v_shuffle_v3f16_v3f16__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1467,23 +1467,23 @@ define void @v_shuffle_v3f16_v3f16__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1528,23 +1528,23 @@ define void @v_shuffle_v3f16_v3f16__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1581,18 +1581,18 @@ define void @v_shuffle_v3f16_v3f16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1627,18 +1627,18 @@ define void @v_shuffle_v3f16_v3f16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1677,20 +1677,20 @@ define void @v_shuffle_v3f16_v3f16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1729,20 +1729,20 @@ define void @v_shuffle_v3f16_v3f16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1777,18 +1777,18 @@ define void @v_shuffle_v3f16_v3f16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -1833,24 +1833,24 @@ define void @v_shuffle_v3f16_v3f16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1897,24 +1897,24 @@ define void @v_shuffle_v3f16_v3f16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1957,21 +1957,21 @@ define void @v_shuffle_v3f16_v3f16__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2018,24 +2018,24 @@ define void @v_shuffle_v3f16_v3f16__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2082,24 +2082,24 @@ define void @v_shuffle_v3f16_v3f16__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2144,23 +2144,23 @@ define void @v_shuffle_v3f16_v3f16__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2205,23 +2205,23 @@ define void @v_shuffle_v3f16_v3f16__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2258,19 +2258,19 @@ define void @v_shuffle_v3f16_v3f16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2307,19 +2307,19 @@ define void @v_shuffle_v3f16_v3f16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2354,19 +2354,19 @@ define void @v_shuffle_v3f16_v3f16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2403,19 +2403,19 @@ define void @v_shuffle_v3f16_v3f16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2450,19 +2450,19 @@ define void @v_shuffle_v3f16_v3f16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2503,22 +2503,22 @@ define void @v_shuffle_v3f16_v3f16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2563,23 +2563,23 @@ define void @v_shuffle_v3f16_v3f16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2620,20 +2620,20 @@ define void @v_shuffle_v3f16_v3f16__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2678,23 +2678,23 @@ define void @v_shuffle_v3f16_v3f16__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2739,23 +2739,23 @@ define void @v_shuffle_v3f16_v3f16__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2800,23 +2800,23 @@ define void @v_shuffle_v3f16_v3f16__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2861,23 +2861,23 @@ define void @v_shuffle_v3f16_v3f16__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2924,17 +2924,17 @@ define void @v_shuffle_v3f16_v3f16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -2967,17 +2967,17 @@ define void @v_shuffle_v3f16_v3f16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -3008,16 +3008,16 @@ define void @v_shuffle_v3f16_v3f16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -3064,19 +3064,19 @@ define void @v_shuffle_v3f16_v3f16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3115,19 +3115,19 @@ define void @v_shuffle_v3f16_v3f16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3162,17 +3162,17 @@ define void @v_shuffle_v3f16_v3f16__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3217,22 +3217,22 @@ define void @v_shuffle_v3f16_v3f16__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3277,22 +3277,22 @@ define void @v_shuffle_v3f16_v3f16__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3337,22 +3337,22 @@ define void @v_shuffle_v3f16_v3f16__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3391,19 +3391,19 @@ define void @v_shuffle_v3f16_v3f16__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3440,18 +3440,18 @@ define void @v_shuffle_v3f16_v3f16__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3498,24 +3498,24 @@ define void @v_shuffle_v3f16_v3f16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3562,24 +3562,24 @@ define void @v_shuffle_v3f16_v3f16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3626,24 +3626,24 @@ define void @v_shuffle_v3f16_v3f16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3680,18 +3680,18 @@ define void @v_shuffle_v3f16_v3f16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3732,20 +3732,20 @@ define void @v_shuffle_v3f16_v3f16__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3786,20 +3786,20 @@ define void @v_shuffle_v3f16_v3f16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3836,18 +3836,18 @@ define void @v_shuffle_v3f16_v3f16__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3894,23 +3894,23 @@ define void @v_shuffle_v3f16_v3f16__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3957,23 +3957,23 @@ define void @v_shuffle_v3f16_v3f16__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4020,24 +4020,24 @@ define void @v_shuffle_v3f16_v3f16__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4078,20 +4078,20 @@ define void @v_shuffle_v3f16_v3f16__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4128,19 +4128,19 @@ define void @v_shuffle_v3f16_v3f16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4185,22 +4185,22 @@ define void @v_shuffle_v3f16_v3f16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4243,22 +4243,22 @@ define void @v_shuffle_v3f16_v3f16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4303,22 +4303,22 @@ define void @v_shuffle_v3f16_v3f16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4357,19 +4357,19 @@ define void @v_shuffle_v3f16_v3f16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4406,19 +4406,19 @@ define void @v_shuffle_v3f16_v3f16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4453,17 +4453,17 @@ define void @v_shuffle_v3f16_v3f16__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4508,22 +4508,22 @@ define void @v_shuffle_v3f16_v3f16__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4568,22 +4568,22 @@ define void @v_shuffle_v3f16_v3f16__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4628,22 +4628,22 @@ define void @v_shuffle_v3f16_v3f16__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4682,19 +4682,19 @@ define void @v_shuffle_v3f16_v3f16__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4733,19 +4733,19 @@ define void @v_shuffle_v3f16_v3f16__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v3f16__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v3f16__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4794,17 +4794,17 @@ define void @s_shuffle_v3f16_v3f16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -4838,17 +4838,17 @@ define void @s_shuffle_v3f16_v3f16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -4882,17 +4882,17 @@ define void @s_shuffle_v3f16_v3f16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -4942,17 +4942,17 @@ define void @s_shuffle_v3f16_v3f16__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4988,17 +4988,17 @@ define void @s_shuffle_v3f16_v3f16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5040,20 +5040,20 @@ define void @s_shuffle_v3f16_v3f16__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5097,21 +5097,21 @@ define void @s_shuffle_v3f16_v3f16__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5153,20 +5153,20 @@ define void @s_shuffle_v3f16_v3f16__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5202,17 +5202,17 @@ define void @s_shuffle_v3f16_v3f16__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5250,18 +5250,18 @@ define void @s_shuffle_v3f16_v3f16__5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5297,17 +5297,17 @@ define void @s_shuffle_v3f16_v3f16__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5351,21 +5351,21 @@ define void @s_shuffle_v3f16_v3f16__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5409,21 +5409,21 @@ define void @s_shuffle_v3f16_v3f16__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5465,20 +5465,20 @@ define void @s_shuffle_v3f16_v3f16__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5516,18 +5516,18 @@ define void @s_shuffle_v3f16_v3f16__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5565,18 +5565,18 @@ define void @s_shuffle_v3f16_v3f16__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5636,18 +5636,18 @@ define void @s_shuffle_v3f16_v3f16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -5683,18 +5683,18 @@ define void @s_shuffle_v3f16_v3f16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> zeroinitializer @@ -5732,19 +5732,19 @@ define void @s_shuffle_v3f16_v3f16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -5780,18 +5780,18 @@ define void @s_shuffle_v3f16_v3f16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -5827,18 +5827,18 @@ define void @s_shuffle_v3f16_v3f16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -5882,22 +5882,22 @@ define void @s_shuffle_v3f16_v3f16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5941,21 +5941,21 @@ define void @s_shuffle_v3f16_v3f16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5999,21 +5999,21 @@ define void @s_shuffle_v3f16_v3f16__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6059,22 +6059,22 @@ define void @s_shuffle_v3f16_v3f16__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6118,21 +6118,21 @@ define void @s_shuffle_v3f16_v3f16__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6176,21 +6176,21 @@ define void @s_shuffle_v3f16_v3f16__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6236,22 +6236,22 @@ define void @s_shuffle_v3f16_v3f16__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6329,18 +6329,18 @@ define void @s_shuffle_v3f16_v3f16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -6376,18 +6376,18 @@ define void @s_shuffle_v3f16_v3f16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -6451,22 +6451,22 @@ define void @s_shuffle_v3f16_v3f16__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6510,21 +6510,21 @@ define void @s_shuffle_v3f16_v3f16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6568,21 +6568,21 @@ define void @s_shuffle_v3f16_v3f16__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6626,21 +6626,21 @@ define void @s_shuffle_v3f16_v3f16__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6684,21 +6684,21 @@ define void @s_shuffle_v3f16_v3f16__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6742,21 +6742,21 @@ define void @s_shuffle_v3f16_v3f16__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6802,22 +6802,22 @@ define void @s_shuffle_v3f16_v3f16__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6895,18 +6895,18 @@ define void @s_shuffle_v3f16_v3f16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -6988,21 +6988,21 @@ define void @s_shuffle_v3f16_v3f16__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7044,20 +7044,20 @@ define void @s_shuffle_v3f16_v3f16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7099,20 +7099,20 @@ define void @s_shuffle_v3f16_v3f16__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7154,20 +7154,20 @@ define void @s_shuffle_v3f16_v3f16__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7211,21 +7211,21 @@ define void @s_shuffle_v3f16_v3f16__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7267,20 +7267,20 @@ define void @s_shuffle_v3f16_v3f16__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7324,21 +7324,21 @@ define void @s_shuffle_v3f16_v3f16__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7388,17 +7388,17 @@ define void @s_shuffle_v3f16_v3f16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -7432,17 +7432,17 @@ define void @s_shuffle_v3f16_v3f16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -7476,17 +7476,17 @@ define void @s_shuffle_v3f16_v3f16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <3 x i32> @@ -7540,19 +7540,19 @@ define void @s_shuffle_v3f16_v3f16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7590,18 +7590,18 @@ define void @s_shuffle_v3f16_v3f16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7639,18 +7639,18 @@ define void @s_shuffle_v3f16_v3f16__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7694,21 +7694,21 @@ define void @s_shuffle_v3f16_v3f16__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7754,22 +7754,22 @@ define void @s_shuffle_v3f16_v3f16__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7813,21 +7813,21 @@ define void @s_shuffle_v3f16_v3f16__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7867,19 +7867,19 @@ define void @s_shuffle_v3f16_v3f16__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7945,21 +7945,21 @@ define void @s_shuffle_v3f16_v3f16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8005,22 +8005,22 @@ define void @s_shuffle_v3f16_v3f16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8064,21 +8064,21 @@ define void @s_shuffle_v3f16_v3f16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8138,18 +8138,18 @@ define void @s_shuffle_v3f16_v3f16__4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8187,18 +8187,18 @@ define void @s_shuffle_v3f16_v3f16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8236,18 +8236,18 @@ define void @s_shuffle_v3f16_v3f16__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8291,21 +8291,21 @@ define void @s_shuffle_v3f16_v3f16__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8351,22 +8351,22 @@ define void @s_shuffle_v3f16_v3f16__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8410,21 +8410,21 @@ define void @s_shuffle_v3f16_v3f16__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8462,18 +8462,18 @@ define void @s_shuffle_v3f16_v3f16__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8537,20 +8537,20 @@ define void @s_shuffle_v3f16_v3f16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8594,21 +8594,21 @@ define void @s_shuffle_v3f16_v3f16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8650,20 +8650,20 @@ define void @s_shuffle_v3f16_v3f16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8723,18 +8723,18 @@ define void @s_shuffle_v3f16_v3f16__4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8798,20 +8798,20 @@ define void @s_shuffle_v3f16_v3f16__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8855,21 +8855,21 @@ define void @s_shuffle_v3f16_v3f16__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8911,20 +8911,20 @@ define void @s_shuffle_v3f16_v3f16__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8984,18 +8984,18 @@ define void @s_shuffle_v3f16_v3f16__5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v3f16__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v3f16__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll index 0ed55847912f3..ecc7ff618932b 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f16_v4f16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3f16_v4f16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -82,17 +82,17 @@ define void @v_shuffle_v3f16_v4f16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -122,16 +122,16 @@ define void @v_shuffle_v3f16_v4f16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v3f16_v4f16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v3f16_v4f16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -257,16 +257,16 @@ define void @v_shuffle_v3f16_v4f16__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -299,17 +299,17 @@ define void @v_shuffle_v3f16_v4f16__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -348,21 +348,21 @@ define void @v_shuffle_v3f16_v4f16__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -403,22 +403,22 @@ define void @v_shuffle_v3f16_v4f16__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -457,21 +457,21 @@ define void @v_shuffle_v3f16_v4f16__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -512,22 +512,22 @@ define void @v_shuffle_v3f16_v4f16__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -560,17 +560,17 @@ define void @v_shuffle_v3f16_v4f16__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -605,18 +605,18 @@ define void @v_shuffle_v3f16_v4f16__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v3f16_v4f16__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v3f16_v4f16__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v3f16_v4f16__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v3f16_v4f16__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -866,23 +866,23 @@ define void @v_shuffle_v3f16_v4f16__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -925,23 +925,23 @@ define void @v_shuffle_v3f16_v4f16__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -978,19 +978,19 @@ define void @v_shuffle_v3f16_v4f16__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1027,19 +1027,19 @@ define void @v_shuffle_v3f16_v4f16__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1076,19 +1076,19 @@ define void @v_shuffle_v3f16_v4f16__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1127,20 +1127,20 @@ define void @v_shuffle_v3f16_v4f16__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1175,19 +1175,19 @@ define void @v_shuffle_v3f16_v4f16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1223,19 +1223,19 @@ define void @v_shuffle_v3f16_v4f16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1269,19 +1269,19 @@ define void @v_shuffle_v3f16_v4f16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1317,19 +1317,19 @@ define void @v_shuffle_v3f16_v4f16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1363,19 +1363,19 @@ define void @v_shuffle_v3f16_v4f16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1409,19 +1409,19 @@ define void @v_shuffle_v3f16_v4f16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1461,22 +1461,22 @@ define void @v_shuffle_v3f16_v4f16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1519,23 +1519,23 @@ define void @v_shuffle_v3f16_v4f16__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1576,22 +1576,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1632,22 +1632,22 @@ define void @v_shuffle_v3f16_v4f16__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1690,23 +1690,23 @@ define void @v_shuffle_v3f16_v4f16__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1747,22 +1747,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1805,23 +1805,23 @@ define void @v_shuffle_v3f16_v4f16__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1862,22 +1862,22 @@ define void @v_shuffle_v3f16_v4f16__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1920,23 +1920,23 @@ define void @v_shuffle_v3f16_v4f16__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -1977,22 +1977,22 @@ define void @v_shuffle_v3f16_v4f16__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2027,18 +2027,18 @@ define void @v_shuffle_v3f16_v4f16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2072,18 +2072,18 @@ define void @v_shuffle_v3f16_v4f16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2121,20 +2121,20 @@ define void @v_shuffle_v3f16_v4f16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2172,20 +2172,20 @@ define void @v_shuffle_v3f16_v4f16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2223,20 +2223,20 @@ define void @v_shuffle_v3f16_v4f16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2270,18 +2270,18 @@ define void @v_shuffle_v3f16_v4f16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2325,24 +2325,24 @@ define void @v_shuffle_v3f16_v4f16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2387,24 +2387,24 @@ define void @v_shuffle_v3f16_v4f16__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2449,24 +2449,24 @@ define void @v_shuffle_v3f16_v4f16__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2507,22 +2507,22 @@ define void @v_shuffle_v3f16_v4f16__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2563,22 +2563,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2677,23 +2677,23 @@ define void @v_shuffle_v3f16_v4f16__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2734,22 +2734,22 @@ define void @v_shuffle_v3f16_v4f16__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2792,23 +2792,23 @@ define void @v_shuffle_v3f16_v4f16__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2849,22 +2849,22 @@ define void @v_shuffle_v3f16_v4f16__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -2899,19 +2899,19 @@ define void @v_shuffle_v3f16_v4f16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2947,19 +2947,19 @@ define void @v_shuffle_v3f16_v4f16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2993,19 +2993,19 @@ define void @v_shuffle_v3f16_v4f16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3041,19 +3041,19 @@ define void @v_shuffle_v3f16_v4f16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3087,19 +3087,19 @@ define void @v_shuffle_v3f16_v4f16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3133,19 +3133,19 @@ define void @v_shuffle_v3f16_v4f16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3185,22 +3185,22 @@ define void @v_shuffle_v3f16_v4f16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3243,23 +3243,23 @@ define void @v_shuffle_v3f16_v4f16__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3300,22 +3300,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3356,22 +3356,22 @@ define void @v_shuffle_v3f16_v4f16__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3412,22 +3412,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3470,23 +3470,23 @@ define void @v_shuffle_v3f16_v4f16__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3529,23 +3529,23 @@ define void @v_shuffle_v3f16_v4f16__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3586,22 +3586,22 @@ define void @v_shuffle_v3f16_v4f16__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3644,23 +3644,23 @@ define void @v_shuffle_v3f16_v4f16__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3701,22 +3701,22 @@ define void @v_shuffle_v3f16_v4f16__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -3755,20 +3755,20 @@ define void @v_shuffle_v3f16_v4f16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3806,20 +3806,20 @@ define void @v_shuffle_v3f16_v4f16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3857,20 +3857,20 @@ define void @v_shuffle_v3f16_v4f16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3902,17 +3902,17 @@ define void @v_shuffle_v3f16_v4f16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3950,20 +3950,20 @@ define void @v_shuffle_v3f16_v4f16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4001,20 +4001,20 @@ define void @v_shuffle_v3f16_v4f16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4058,24 +4058,24 @@ define void @v_shuffle_v3f16_v4f16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4120,24 +4120,24 @@ define void @v_shuffle_v3f16_v4f16__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4182,24 +4182,24 @@ define void @v_shuffle_v3f16_v4f16__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4240,22 +4240,22 @@ define void @v_shuffle_v3f16_v4f16__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4296,22 +4296,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4354,23 +4354,23 @@ define void @v_shuffle_v3f16_v4f16__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4411,22 +4411,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4467,22 +4467,22 @@ define void @v_shuffle_v3f16_v4f16__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4525,23 +4525,23 @@ define void @v_shuffle_v3f16_v4f16__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4582,22 +4582,22 @@ define void @v_shuffle_v3f16_v4f16__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4641,17 +4641,17 @@ define void @v_shuffle_v3f16_v4f16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4683,17 +4683,17 @@ define void @v_shuffle_v3f16_v4f16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4723,16 +4723,16 @@ define void @v_shuffle_v3f16_v4f16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4764,17 +4764,17 @@ define void @v_shuffle_v3f16_v4f16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4819,19 +4819,19 @@ define void @v_shuffle_v3f16_v4f16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4868,19 +4868,19 @@ define void @v_shuffle_v3f16_v4f16__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4915,19 +4915,19 @@ define void @v_shuffle_v3f16_v4f16__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -4962,19 +4962,19 @@ define void @v_shuffle_v3f16_v4f16__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5015,22 +5015,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5073,22 +5073,22 @@ define void @v_shuffle_v3f16_v4f16__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5129,22 +5129,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5187,22 +5187,22 @@ define void @v_shuffle_v3f16_v4f16__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5239,19 +5239,19 @@ define void @v_shuffle_v3f16_v4f16__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5286,19 +5286,19 @@ define void @v_shuffle_v3f16_v4f16__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5333,18 +5333,18 @@ define void @v_shuffle_v3f16_v4f16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5389,24 +5389,24 @@ define void @v_shuffle_v3f16_v4f16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5451,24 +5451,24 @@ define void @v_shuffle_v3f16_v4f16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5513,24 +5513,24 @@ define void @v_shuffle_v3f16_v4f16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5575,24 +5575,24 @@ define void @v_shuffle_v3f16_v4f16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5627,18 +5627,18 @@ define void @v_shuffle_v3f16_v4f16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5677,20 +5677,20 @@ define void @v_shuffle_v3f16_v4f16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5729,20 +5729,20 @@ define void @v_shuffle_v3f16_v4f16__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5781,20 +5781,20 @@ define void @v_shuffle_v3f16_v4f16__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5829,19 +5829,19 @@ define void @v_shuffle_v3f16_v4f16__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5882,22 +5882,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5940,22 +5940,22 @@ define void @v_shuffle_v3f16_v4f16__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -5996,22 +5996,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6054,22 +6054,22 @@ define void @v_shuffle_v3f16_v4f16__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6104,19 +6104,19 @@ define void @v_shuffle_v3f16_v4f16__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6151,19 +6151,19 @@ define void @v_shuffle_v3f16_v4f16__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6198,19 +6198,19 @@ define void @v_shuffle_v3f16_v4f16__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6253,22 +6253,22 @@ define void @v_shuffle_v3f16_v4f16__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6309,22 +6309,22 @@ define void @v_shuffle_v3f16_v4f16__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6367,22 +6367,22 @@ define void @v_shuffle_v3f16_v4f16__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6423,22 +6423,22 @@ define void @v_shuffle_v3f16_v4f16__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6475,19 +6475,19 @@ define void @v_shuffle_v3f16_v4f16__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6522,19 +6522,19 @@ define void @v_shuffle_v3f16_v4f16__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6571,19 +6571,19 @@ define void @v_shuffle_v3f16_v4f16__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6618,19 +6618,19 @@ define void @v_shuffle_v3f16_v4f16__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6665,19 +6665,19 @@ define void @v_shuffle_v3f16_v4f16__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6718,22 +6718,22 @@ define void @v_shuffle_v3f16_v4f16__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6776,22 +6776,22 @@ define void @v_shuffle_v3f16_v4f16__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6832,22 +6832,22 @@ define void @v_shuffle_v3f16_v4f16__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6890,22 +6890,22 @@ define void @v_shuffle_v3f16_v4f16__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6940,19 +6940,19 @@ define void @v_shuffle_v3f16_v4f16__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -6989,19 +6989,19 @@ define void @v_shuffle_v3f16_v4f16__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7040,20 +7040,20 @@ define void @v_shuffle_v3f16_v4f16__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7098,24 +7098,24 @@ define void @v_shuffle_v3f16_v4f16__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7160,24 +7160,24 @@ define void @v_shuffle_v3f16_v4f16__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7222,24 +7222,24 @@ define void @v_shuffle_v3f16_v4f16__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7284,24 +7284,24 @@ define void @v_shuffle_v3f16_v4f16__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7340,20 +7340,20 @@ define void @v_shuffle_v3f16_v4f16__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7392,20 +7392,20 @@ define void @v_shuffle_v3f16_v4f16__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7438,17 +7438,17 @@ define void @v_shuffle_v3f16_v4f16__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7485,19 +7485,19 @@ define void @v_shuffle_v3f16_v4f16__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7540,23 +7540,23 @@ define void @v_shuffle_v3f16_v4f16__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7601,24 +7601,24 @@ define void @v_shuffle_v3f16_v4f16__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7661,23 +7661,23 @@ define void @v_shuffle_v3f16_v4f16__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7722,24 +7722,24 @@ define void @v_shuffle_v3f16_v4f16__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7776,19 +7776,19 @@ define void @v_shuffle_v3f16_v4f16__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v3, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7827,20 +7827,20 @@ define void @v_shuffle_v3f16_v4f16__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7877,19 +7877,19 @@ define void @v_shuffle_v3f16_v4f16__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f16_v4f16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -7935,17 +7935,17 @@ define void @s_shuffle_v3f16_v4f16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -7978,17 +7978,17 @@ define void @s_shuffle_v3f16_v4f16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -8021,17 +8021,17 @@ define void @s_shuffle_v3f16_v4f16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -8064,17 +8064,17 @@ define void @s_shuffle_v3f16_v4f16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -8122,17 +8122,17 @@ define void @s_shuffle_v3f16_v4f16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8166,17 +8166,17 @@ define void @s_shuffle_v3f16_v4f16__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8210,17 +8210,17 @@ define void @s_shuffle_v3f16_v4f16__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8262,21 +8262,21 @@ define void @s_shuffle_v3f16_v4f16__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8320,22 +8320,22 @@ define void @s_shuffle_v3f16_v4f16__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8377,21 +8377,21 @@ define void @s_shuffle_v3f16_v4f16__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8435,22 +8435,22 @@ define void @s_shuffle_v3f16_v4f16__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8486,18 +8486,18 @@ define void @s_shuffle_v3f16_v4f16__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8535,19 +8535,19 @@ define void @s_shuffle_v3f16_v4f16__7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8583,18 +8583,18 @@ define void @s_shuffle_v3f16_v4f16__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8630,18 +8630,18 @@ define void @s_shuffle_v3f16_v4f16__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8685,22 +8685,22 @@ define void @s_shuffle_v3f16_v4f16__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8744,22 +8744,22 @@ define void @s_shuffle_v3f16_v4f16__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8801,21 +8801,21 @@ define void @s_shuffle_v3f16_v4f16__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8859,22 +8859,22 @@ define void @s_shuffle_v3f16_v4f16__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8912,19 +8912,19 @@ define void @s_shuffle_v3f16_v4f16__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -8962,19 +8962,19 @@ define void @s_shuffle_v3f16_v4f16__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9010,18 +9010,18 @@ define void @s_shuffle_v3f16_v4f16__7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9057,18 +9057,18 @@ define void @s_shuffle_v3f16_v4f16__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9104,18 +9104,18 @@ define void @s_shuffle_v3f16_v4f16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9150,18 +9150,18 @@ define void @s_shuffle_v3f16_v4f16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9198,19 +9198,19 @@ define void @s_shuffle_v3f16_v4f16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9245,18 +9245,18 @@ define void @s_shuffle_v3f16_v4f16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9293,19 +9293,19 @@ define void @s_shuffle_v3f16_v4f16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9340,18 +9340,18 @@ define void @s_shuffle_v3f16_v4f16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -9394,22 +9394,22 @@ define void @s_shuffle_v3f16_v4f16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9451,21 +9451,21 @@ define void @s_shuffle_v3f16_v4f16__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9509,22 +9509,22 @@ define void @s_shuffle_v3f16_v4f16__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9566,21 +9566,21 @@ define void @s_shuffle_v3f16_v4f16__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9626,23 +9626,23 @@ define void @s_shuffle_v3f16_v4f16__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9686,22 +9686,22 @@ define void @s_shuffle_v3f16_v4f16__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9747,23 +9747,23 @@ define void @s_shuffle_v3f16_v4f16__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9807,22 +9807,22 @@ define void @s_shuffle_v3f16_v4f16__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9868,23 +9868,23 @@ define void @s_shuffle_v3f16_v4f16__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -9928,22 +9928,22 @@ define void @s_shuffle_v3f16_v4f16__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10017,18 +10017,18 @@ define void @s_shuffle_v3f16_v4f16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -10063,18 +10063,18 @@ define void @s_shuffle_v3f16_v4f16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -10111,19 +10111,19 @@ define void @s_shuffle_v3f16_v4f16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -10185,22 +10185,22 @@ define void @s_shuffle_v3f16_v4f16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10242,21 +10242,21 @@ define void @s_shuffle_v3f16_v4f16__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10300,22 +10300,22 @@ define void @s_shuffle_v3f16_v4f16__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10357,21 +10357,21 @@ define void @s_shuffle_v3f16_v4f16__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10415,22 +10415,22 @@ define void @s_shuffle_v3f16_v4f16__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10474,22 +10474,22 @@ define void @s_shuffle_v3f16_v4f16__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10535,23 +10535,23 @@ define void @s_shuffle_v3f16_v4f16__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10595,22 +10595,22 @@ define void @s_shuffle_v3f16_v4f16__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10656,23 +10656,23 @@ define void @s_shuffle_v3f16_v4f16__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10716,22 +10716,22 @@ define void @s_shuffle_v3f16_v4f16__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10805,18 +10805,18 @@ define void @s_shuffle_v3f16_v4f16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -10870,18 +10870,18 @@ define void @s_shuffle_v3f16_v4f16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -10941,21 +10941,21 @@ define void @s_shuffle_v3f16_v4f16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -10995,20 +10995,20 @@ define void @s_shuffle_v3f16_v4f16__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11050,21 +11050,21 @@ define void @s_shuffle_v3f16_v4f16__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11104,20 +11104,20 @@ define void @s_shuffle_v3f16_v4f16__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11159,21 +11159,21 @@ define void @s_shuffle_v3f16_v4f16__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11217,22 +11217,22 @@ define void @s_shuffle_v3f16_v4f16__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11276,22 +11276,22 @@ define void @s_shuffle_v3f16_v4f16__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11333,21 +11333,21 @@ define void @s_shuffle_v3f16_v4f16__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11391,22 +11391,22 @@ define void @s_shuffle_v3f16_v4f16__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11448,21 +11448,21 @@ define void @s_shuffle_v3f16_v4f16__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11498,18 +11498,18 @@ define void @s_shuffle_v3f16_v4f16__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11544,18 +11544,18 @@ define void @s_shuffle_v3f16_v4f16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11592,19 +11592,19 @@ define void @s_shuffle_v3f16_v4f16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11639,18 +11639,18 @@ define void @s_shuffle_v3f16_v4f16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11685,18 +11685,18 @@ define void @s_shuffle_v3f16_v4f16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11731,18 +11731,18 @@ define void @s_shuffle_v3f16_v4f16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -11785,22 +11785,22 @@ define void @s_shuffle_v3f16_v4f16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11842,21 +11842,21 @@ define void @s_shuffle_v3f16_v4f16__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11900,22 +11900,22 @@ define void @s_shuffle_v3f16_v4f16__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -11957,21 +11957,21 @@ define void @s_shuffle_v3f16_v4f16__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12015,22 +12015,22 @@ define void @s_shuffle_v3f16_v4f16__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12076,23 +12076,23 @@ define void @s_shuffle_v3f16_v4f16__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12136,22 +12136,22 @@ define void @s_shuffle_v3f16_v4f16__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12195,22 +12195,22 @@ define void @s_shuffle_v3f16_v4f16__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12256,23 +12256,23 @@ define void @s_shuffle_v3f16_v4f16__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12316,22 +12316,22 @@ define void @s_shuffle_v3f16_v4f16__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12378,17 +12378,17 @@ define void @s_shuffle_v3f16_v4f16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -12421,17 +12421,17 @@ define void @s_shuffle_v3f16_v4f16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -12464,17 +12464,17 @@ define void @s_shuffle_v3f16_v4f16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -12507,17 +12507,17 @@ define void @s_shuffle_v3f16_v4f16__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> @@ -12569,19 +12569,19 @@ define void @s_shuffle_v3f16_v4f16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12617,18 +12617,18 @@ define void @s_shuffle_v3f16_v4f16__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12666,19 +12666,19 @@ define void @s_shuffle_v3f16_v4f16__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12714,18 +12714,18 @@ define void @s_shuffle_v3f16_v4f16__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12769,22 +12769,22 @@ define void @s_shuffle_v3f16_v4f16__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12830,23 +12830,23 @@ define void @s_shuffle_v3f16_v4f16__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12890,22 +12890,22 @@ define void @s_shuffle_v3f16_v4f16__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -12951,23 +12951,23 @@ define void @s_shuffle_v3f16_v4f16__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13007,20 +13007,20 @@ define void @s_shuffle_v3f16_v4f16__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13058,19 +13058,19 @@ define void @s_shuffle_v3f16_v4f16__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13132,21 +13132,21 @@ define void @s_shuffle_v3f16_v4f16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13190,22 +13190,22 @@ define void @s_shuffle_v3f16_v4f16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13247,21 +13247,21 @@ define void @s_shuffle_v3f16_v4f16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13305,22 +13305,22 @@ define void @s_shuffle_v3f16_v4f16__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13376,18 +13376,18 @@ define void @s_shuffle_v3f16_v4f16__5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13423,18 +13423,18 @@ define void @s_shuffle_v3f16_v4f16__6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13472,19 +13472,19 @@ define void @s_shuffle_v3f16_v4f16__7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13520,18 +13520,18 @@ define void @s_shuffle_v3f16_v4f16__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13575,22 +13575,22 @@ define void @s_shuffle_v3f16_v4f16__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13636,23 +13636,23 @@ define void @s_shuffle_v3f16_v4f16__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13696,22 +13696,22 @@ define void @s_shuffle_v3f16_v4f16__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13757,23 +13757,23 @@ define void @s_shuffle_v3f16_v4f16__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13811,19 +13811,19 @@ define void @s_shuffle_v3f16_v4f16__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13861,19 +13861,19 @@ define void @s_shuffle_v3f16_v4f16__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13933,20 +13933,20 @@ define void @s_shuffle_v3f16_v4f16__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -13988,21 +13988,21 @@ define void @s_shuffle_v3f16_v4f16__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14042,20 +14042,20 @@ define void @s_shuffle_v3f16_v4f16__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14097,21 +14097,21 @@ define void @s_shuffle_v3f16_v4f16__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14167,18 +14167,18 @@ define void @s_shuffle_v3f16_v4f16__5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14234,18 +14234,18 @@ define void @s_shuffle_v3f16_v4f16__7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14307,21 +14307,21 @@ define void @s_shuffle_v3f16_v4f16__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14365,22 +14365,22 @@ define void @s_shuffle_v3f16_v4f16__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14422,21 +14422,21 @@ define void @s_shuffle_v3f16_v4f16__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14480,22 +14480,22 @@ define void @s_shuffle_v3f16_v4f16__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14531,18 +14531,18 @@ define void @s_shuffle_v3f16_v4f16__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14580,19 +14580,19 @@ define void @s_shuffle_v3f16_v4f16__7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14628,18 +14628,18 @@ define void @s_shuffle_v3f16_v4f16__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14681,21 +14681,21 @@ define void @s_shuffle_v3f16_v4f16__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14739,22 +14739,22 @@ define void @s_shuffle_v3f16_v4f16__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14796,21 +14796,21 @@ define void @s_shuffle_v3f16_v4f16__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14854,22 +14854,22 @@ define void @s_shuffle_v3f16_v4f16__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14905,18 +14905,18 @@ define void @s_shuffle_v3f16_v4f16__4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -14954,19 +14954,19 @@ define void @s_shuffle_v3f16_v4f16__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15002,18 +15002,18 @@ define void @s_shuffle_v3f16_v4f16__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15049,18 +15049,18 @@ define void @s_shuffle_v3f16_v4f16__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15102,21 +15102,21 @@ define void @s_shuffle_v3f16_v4f16__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15160,22 +15160,22 @@ define void @s_shuffle_v3f16_v4f16__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15217,21 +15217,21 @@ define void @s_shuffle_v3f16_v4f16__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15275,22 +15275,22 @@ define void @s_shuffle_v3f16_v4f16__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15326,18 +15326,18 @@ define void @s_shuffle_v3f16_v4f16__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15375,19 +15375,19 @@ define void @s_shuffle_v3f16_v4f16__7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> @@ -15423,18 +15423,18 @@ define void @s_shuffle_v3f16_v4f16__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f16_v4f16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll index 47355ffdaed47..430f64164d24f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f32_v2f32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3f32_v2f32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3f32_v2f32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v3f32_v2f32__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v3f32_v2f32__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v3f32_v2f32__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -283,19 +283,19 @@ define void @v_shuffle_v3f32_v2f32__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -328,17 +328,17 @@ define void @v_shuffle_v3f32_v2f32__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -377,20 +377,20 @@ define void @v_shuffle_v3f32_v2f32__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -430,21 +430,21 @@ define void @v_shuffle_v3f32_v2f32__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -479,19 +479,19 @@ define void @v_shuffle_v3f32_v2f32__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -526,18 +526,18 @@ define void @v_shuffle_v3f32_v2f32__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -571,18 +571,18 @@ define void @v_shuffle_v3f32_v2f32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -616,18 +616,18 @@ define void @v_shuffle_v3f32_v2f32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> zeroinitializer store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -661,19 +661,19 @@ define void @v_shuffle_v3f32_v2f32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -706,18 +706,18 @@ define void @v_shuffle_v3f32_v2f32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -757,21 +757,21 @@ define void @v_shuffle_v3f32_v2f32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -811,20 +811,20 @@ define void @v_shuffle_v3f32_v2f32__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -865,21 +865,21 @@ define void @v_shuffle_v3f32_v2f32__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -920,22 +920,22 @@ define void @v_shuffle_v3f32_v2f32__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -968,18 +968,18 @@ define void @v_shuffle_v3f32_v2f32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1011,18 +1011,18 @@ define void @v_shuffle_v3f32_v2f32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1056,18 +1056,18 @@ define void @v_shuffle_v3f32_v2f32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1099,18 +1099,18 @@ define void @v_shuffle_v3f32_v2f32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1150,21 +1150,21 @@ define void @v_shuffle_v3f32_v2f32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1204,21 +1204,21 @@ define void @v_shuffle_v3f32_v2f32__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1259,22 +1259,22 @@ define void @v_shuffle_v3f32_v2f32__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1316,22 +1316,22 @@ define void @v_shuffle_v3f32_v2f32__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1373,16 +1373,16 @@ define void @v_shuffle_v3f32_v2f32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1414,17 +1414,17 @@ define void @v_shuffle_v3f32_v2f32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1469,19 +1469,19 @@ define void @v_shuffle_v3f32_v2f32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1515,18 +1515,18 @@ define void @v_shuffle_v3f32_v2f32__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1567,22 +1567,22 @@ define void @v_shuffle_v3f32_v2f32__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1621,21 +1621,21 @@ define void @v_shuffle_v3f32_v2f32__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1668,18 +1668,18 @@ define void @v_shuffle_v3f32_v2f32__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1719,21 +1719,21 @@ define void @v_shuffle_v3f32_v2f32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1774,21 +1774,21 @@ define void @v_shuffle_v3f32_v2f32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1821,18 +1821,18 @@ define void @v_shuffle_v3f32_v2f32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1866,18 +1866,18 @@ define void @v_shuffle_v3f32_v2f32__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1919,22 +1919,22 @@ define void @v_shuffle_v3f32_v2f32__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -1975,21 +1975,21 @@ define void @v_shuffle_v3f32_v2f32__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2024,19 +2024,19 @@ define void @v_shuffle_v3f32_v2f32__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v2f32__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v2f32__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2081,17 +2081,17 @@ define void @s_shuffle_v3f32_v2f32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -2123,17 +2123,17 @@ define void @s_shuffle_v3f32_v2f32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -2179,17 +2179,17 @@ define void @s_shuffle_v3f32_v2f32__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2230,21 +2230,21 @@ define void @s_shuffle_v3f32_v2f32__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2283,20 +2283,20 @@ define void @s_shuffle_v3f32_v2f32__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2331,18 +2331,18 @@ define void @s_shuffle_v3f32_v2f32__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2402,21 +2402,21 @@ define void @s_shuffle_v3f32_v2f32__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2457,21 +2457,21 @@ define void @s_shuffle_v3f32_v2f32__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2508,19 +2508,19 @@ define void @s_shuffle_v3f32_v2f32__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2575,18 +2575,18 @@ define void @s_shuffle_v3f32_v2f32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -2641,19 +2641,19 @@ define void @s_shuffle_v3f32_v2f32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -2687,18 +2687,18 @@ define void @s_shuffle_v3f32_v2f32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -2740,22 +2740,22 @@ define void @s_shuffle_v3f32_v2f32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2796,21 +2796,21 @@ define void @s_shuffle_v3f32_v2f32__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2853,22 +2853,22 @@ define void @s_shuffle_v3f32_v2f32__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -2911,22 +2911,22 @@ define void @s_shuffle_v3f32_v2f32__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3040,21 +3040,21 @@ define void @s_shuffle_v3f32_v2f32__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3095,21 +3095,21 @@ define void @s_shuffle_v3f32_v2f32__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3152,22 +3152,22 @@ define void @s_shuffle_v3f32_v2f32__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3210,22 +3210,22 @@ define void @s_shuffle_v3f32_v2f32__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3270,17 +3270,17 @@ define void @s_shuffle_v3f32_v2f32__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -3312,17 +3312,17 @@ define void @s_shuffle_v3f32_v2f32__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -3372,19 +3372,19 @@ define void @s_shuffle_v3f32_v2f32__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3419,18 +3419,18 @@ define void @s_shuffle_v3f32_v2f32__3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3473,22 +3473,22 @@ define void @s_shuffle_v3f32_v2f32__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3529,21 +3529,21 @@ define void @s_shuffle_v3f32_v2f32__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3603,21 +3603,21 @@ define void @s_shuffle_v3f32_v2f32__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3658,21 +3658,21 @@ define void @s_shuffle_v3f32_v2f32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3726,18 +3726,18 @@ define void @s_shuffle_v3f32_v2f32__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3780,22 +3780,22 @@ define void @s_shuffle_v3f32_v2f32__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3836,21 +3836,21 @@ define void @s_shuffle_v3f32_v2f32__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> @@ -3887,19 +3887,19 @@ define void @s_shuffle_v3f32_v2f32__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v2f32__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v2f32__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll index 1160e05a0480d..ef670e963bdb6 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f32_v3f32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3f32_v3f32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3f32_v3f32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v3f32_v3f32__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v3f32_v3f32__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v3f32_v3f32__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -268,22 +268,22 @@ define void @v_shuffle_v3f32_v3f32__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -322,21 +322,21 @@ define void @v_shuffle_v3f32_v3f32__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -377,21 +377,21 @@ define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -425,18 +425,18 @@ define void @v_shuffle_v3f32_v3f32__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -469,17 +469,17 @@ define void @v_shuffle_v3f32_v3f32__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -514,18 +514,18 @@ define void @v_shuffle_v3f32_v3f32__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -568,23 +568,23 @@ define void @v_shuffle_v3f32_v3f32__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -627,23 +627,23 @@ define void @v_shuffle_v3f32_v3f32__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -684,22 +684,22 @@ define void @v_shuffle_v3f32_v3f32__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -735,20 +735,20 @@ define void @v_shuffle_v3f32_v3f32__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -785,20 +785,20 @@ define void @v_shuffle_v3f32_v3f32__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -833,18 +833,18 @@ define void @v_shuffle_v3f32_v3f32__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -879,18 +879,18 @@ define void @v_shuffle_v3f32_v3f32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -924,18 +924,18 @@ define void @v_shuffle_v3f32_v3f32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> zeroinitializer store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -970,19 +970,19 @@ define void @v_shuffle_v3f32_v3f32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1017,19 +1017,19 @@ define void @v_shuffle_v3f32_v3f32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1063,18 +1063,18 @@ define void @v_shuffle_v3f32_v3f32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1116,23 +1116,23 @@ define void @v_shuffle_v3f32_v3f32__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1175,23 +1175,23 @@ define void @v_shuffle_v3f32_v3f32__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1232,22 +1232,22 @@ define void @v_shuffle_v3f32_v3f32__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1290,23 +1290,23 @@ define void @v_shuffle_v3f32_v3f32__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1348,22 +1348,22 @@ define void @v_shuffle_v3f32_v3f32__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1405,21 +1405,21 @@ define void @v_shuffle_v3f32_v3f32__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1460,22 +1460,22 @@ define void @v_shuffle_v3f32_v3f32__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1508,17 +1508,17 @@ define void @v_shuffle_v3f32_v3f32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1550,17 +1550,17 @@ define void @v_shuffle_v3f32_v3f32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1594,18 +1594,18 @@ define void @v_shuffle_v3f32_v3f32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1639,18 +1639,18 @@ define void @v_shuffle_v3f32_v3f32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v3f32_v3f32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1732,22 +1732,22 @@ define void @v_shuffle_v3f32_v3f32__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1788,22 +1788,22 @@ define void @v_shuffle_v3f32_v3f32__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1844,22 +1844,22 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1902,23 +1902,23 @@ define void @v_shuffle_v3f32_v3f32__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -1961,22 +1961,22 @@ define void @v_shuffle_v3f32_v3f32__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2018,21 +2018,21 @@ define void @v_shuffle_v3f32_v3f32__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2073,22 +2073,22 @@ define void @v_shuffle_v3f32_v3f32__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2121,17 +2121,17 @@ define void @v_shuffle_v3f32_v3f32__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2163,17 +2163,17 @@ define void @v_shuffle_v3f32_v3f32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2207,18 +2207,18 @@ define void @v_shuffle_v3f32_v3f32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2252,18 +2252,18 @@ define void @v_shuffle_v3f32_v3f32__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2295,17 +2295,17 @@ define void @v_shuffle_v3f32_v3f32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2345,21 +2345,21 @@ define void @v_shuffle_v3f32_v3f32__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2400,21 +2400,21 @@ define void @v_shuffle_v3f32_v3f32__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2453,21 +2453,21 @@ define void @v_shuffle_v3f32_v3f32__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2510,22 +2510,22 @@ define void @v_shuffle_v3f32_v3f32__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2564,21 +2564,21 @@ define void @v_shuffle_v3f32_v3f32__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v3f32_v3f32__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2675,21 +2675,21 @@ define void @v_shuffle_v3f32_v3f32__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2731,16 +2731,16 @@ define void @v_shuffle_v3f32_v3f32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2772,17 +2772,17 @@ define void @v_shuffle_v3f32_v3f32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2814,17 +2814,17 @@ define void @v_shuffle_v3f32_v3f32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2870,19 +2870,19 @@ define void @v_shuffle_v3f32_v3f32__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2918,19 +2918,19 @@ define void @v_shuffle_v3f32_v3f32__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v3f32_v3f32__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3017,21 +3017,21 @@ define void @v_shuffle_v3f32_v3f32__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3070,21 +3070,21 @@ define void @v_shuffle_v3f32_v3f32__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3127,22 +3127,22 @@ define void @v_shuffle_v3f32_v3f32__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3177,19 +3177,19 @@ define void @v_shuffle_v3f32_v3f32__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3222,17 +3222,17 @@ define void @v_shuffle_v3f32_v3f32__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3272,22 +3272,22 @@ define void @v_shuffle_v3f32_v3f32__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3f32_v3f32__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3384,21 +3384,21 @@ define void @v_shuffle_v3f32_v3f32__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3431,17 +3431,17 @@ define void @v_shuffle_v3f32_v3f32__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3476,18 +3476,18 @@ define void @v_shuffle_v3f32_v3f32__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3522,18 +3522,18 @@ define void @v_shuffle_v3f32_v3f32__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3568,18 +3568,18 @@ define void @v_shuffle_v3f32_v3f32__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3620,21 +3620,21 @@ define void @v_shuffle_v3f32_v3f32__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3675,22 +3675,22 @@ define void @v_shuffle_v3f32_v3f32__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3733,22 +3733,22 @@ define void @v_shuffle_v3f32_v3f32__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3783,19 +3783,19 @@ define void @v_shuffle_v3f32_v3f32__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3828,17 +3828,17 @@ define void @v_shuffle_v3f32_v3f32__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3879,22 +3879,22 @@ define void @v_shuffle_v3f32_v3f32__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3935,22 +3935,22 @@ define void @v_shuffle_v3f32_v3f32__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -3991,21 +3991,21 @@ define void @v_shuffle_v3f32_v3f32__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4038,17 +4038,17 @@ define void @v_shuffle_v3f32_v3f32__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4083,18 +4083,18 @@ define void @v_shuffle_v3f32_v3f32__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4127,17 +4127,17 @@ define void @v_shuffle_v3f32_v3f32__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4178,22 +4178,22 @@ define void @v_shuffle_v3f32_v3f32__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4234,22 +4234,22 @@ define void @v_shuffle_v3f32_v3f32__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4290,21 +4290,21 @@ define void @v_shuffle_v3f32_v3f32__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4341,20 +4341,20 @@ define void @v_shuffle_v3f32_v3f32__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4387,17 +4387,17 @@ define void @v_shuffle_v3f32_v3f32__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v3f32__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4442,17 +4442,17 @@ define void @s_shuffle_v3f32_v3f32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -4484,17 +4484,17 @@ define void @s_shuffle_v3f32_v3f32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -4526,17 +4526,17 @@ define void @s_shuffle_v3f32_v3f32__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -4582,17 +4582,17 @@ define void @s_shuffle_v3f32_v3f32__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4625,17 +4625,17 @@ define void @s_shuffle_v3f32_v3f32__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4676,21 +4676,21 @@ define void @s_shuffle_v3f32_v3f32__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4729,20 +4729,20 @@ define void @s_shuffle_v3f32_v3f32__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4783,21 +4783,21 @@ define void @s_shuffle_v3f32_v3f32__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4832,18 +4832,18 @@ define void @s_shuffle_v3f32_v3f32__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4897,18 +4897,18 @@ define void @s_shuffle_v3f32_v3f32__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -4951,22 +4951,22 @@ define void @s_shuffle_v3f32_v3f32__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5009,22 +5009,22 @@ define void @s_shuffle_v3f32_v3f32__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5065,21 +5065,21 @@ define void @s_shuffle_v3f32_v3f32__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5116,19 +5116,19 @@ define void @s_shuffle_v3f32_v3f32__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5165,19 +5165,19 @@ define void @s_shuffle_v3f32_v3f32__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5232,18 +5232,18 @@ define void @s_shuffle_v3f32_v3f32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -5298,19 +5298,19 @@ define void @s_shuffle_v3f32_v3f32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -5346,19 +5346,19 @@ define void @s_shuffle_v3f32_v3f32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -5392,18 +5392,18 @@ define void @s_shuffle_v3f32_v3f32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -5445,22 +5445,22 @@ define void @s_shuffle_v3f32_v3f32__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5503,22 +5503,22 @@ define void @s_shuffle_v3f32_v3f32__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5559,21 +5559,21 @@ define void @s_shuffle_v3f32_v3f32__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5616,22 +5616,22 @@ define void @s_shuffle_v3f32_v3f32__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5674,22 +5674,22 @@ define void @s_shuffle_v3f32_v3f32__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5732,22 +5732,22 @@ define void @s_shuffle_v3f32_v3f32__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5788,21 +5788,21 @@ define void @s_shuffle_v3f32_v3f32__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5935,21 +5935,21 @@ define void @s_shuffle_v3f32_v3f32__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -5990,21 +5990,21 @@ define void @s_shuffle_v3f32_v3f32__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6045,21 +6045,21 @@ define void @s_shuffle_v3f32_v3f32__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6102,22 +6102,22 @@ define void @s_shuffle_v3f32_v3f32__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6160,22 +6160,22 @@ define void @s_shuffle_v3f32_v3f32__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6218,22 +6218,22 @@ define void @s_shuffle_v3f32_v3f32__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6274,21 +6274,21 @@ define void @s_shuffle_v3f32_v3f32__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6421,21 +6421,21 @@ define void @s_shuffle_v3f32_v3f32__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6476,21 +6476,21 @@ define void @s_shuffle_v3f32_v3f32__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6529,20 +6529,20 @@ define void @s_shuffle_v3f32_v3f32__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6585,22 +6585,22 @@ define void @s_shuffle_v3f32_v3f32__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6639,20 +6639,20 @@ define void @s_shuffle_v3f32_v3f32__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6693,21 +6693,21 @@ define void @s_shuffle_v3f32_v3f32__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6748,21 +6748,21 @@ define void @s_shuffle_v3f32_v3f32__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -6807,17 +6807,17 @@ define void @s_shuffle_v3f32_v3f32__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -6849,17 +6849,17 @@ define void @s_shuffle_v3f32_v3f32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -6891,17 +6891,17 @@ define void @s_shuffle_v3f32_v3f32__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -6951,19 +6951,19 @@ define void @s_shuffle_v3f32_v3f32__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7000,19 +7000,19 @@ define void @s_shuffle_v3f32_v3f32__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7047,18 +7047,18 @@ define void @s_shuffle_v3f32_v3f32__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7101,22 +7101,22 @@ define void @s_shuffle_v3f32_v3f32__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7157,21 +7157,21 @@ define void @s_shuffle_v3f32_v3f32__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7214,22 +7214,22 @@ define void @s_shuffle_v3f32_v3f32__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7266,19 +7266,19 @@ define void @s_shuffle_v3f32_v3f32__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7338,21 +7338,21 @@ define void @s_shuffle_v3f32_v3f32__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7393,21 +7393,21 @@ define void @s_shuffle_v3f32_v3f32__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7448,21 +7448,21 @@ define void @s_shuffle_v3f32_v3f32__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7556,18 +7556,18 @@ define void @s_shuffle_v3f32_v3f32__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7610,22 +7610,22 @@ define void @s_shuffle_v3f32_v3f32__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7666,21 +7666,21 @@ define void @s_shuffle_v3f32_v3f32__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7723,22 +7723,22 @@ define void @s_shuffle_v3f32_v3f32__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7775,19 +7775,19 @@ define void @s_shuffle_v3f32_v3f32__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7847,21 +7847,21 @@ define void @s_shuffle_v3f32_v3f32__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7902,21 +7902,21 @@ define void @s_shuffle_v3f32_v3f32__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -7957,21 +7957,21 @@ define void @s_shuffle_v3f32_v3f32__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -8070,21 +8070,21 @@ define void @s_shuffle_v3f32_v3f32__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -8125,21 +8125,21 @@ define void @s_shuffle_v3f32_v3f32__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -8180,21 +8180,21 @@ define void @s_shuffle_v3f32_v3f32__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> @@ -8231,19 +8231,19 @@ define void @s_shuffle_v3f32_v3f32__5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v3f32__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v3f32__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll index 1a00cbb7ad174..50c69de069986 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3f32_v4f32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3f32_v4f32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -78,17 +78,17 @@ define void @v_shuffle_v3f32_v4f32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -120,17 +120,17 @@ define void @v_shuffle_v3f32_v4f32__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -162,17 +162,17 @@ define void @v_shuffle_v3f32_v4f32__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -214,17 +214,17 @@ define void @v_shuffle_v3f32_v4f32__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -257,17 +257,17 @@ define void @v_shuffle_v3f32_v4f32__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -300,17 +300,17 @@ define void @v_shuffle_v3f32_v4f32__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -351,22 +351,22 @@ define void @v_shuffle_v3f32_v4f32__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v3f32_v4f32__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v3f32_v4f32__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -563,18 +563,18 @@ define void @v_shuffle_v3f32_v4f32__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -607,17 +607,17 @@ define void @v_shuffle_v3f32_v4f32__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -651,18 +651,18 @@ define void @v_shuffle_v3f32_v4f32__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -751,23 +751,23 @@ define void @v_shuffle_v3f32_v4f32__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -810,23 +810,23 @@ define void @v_shuffle_v3f32_v4f32__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -867,22 +867,22 @@ define void @v_shuffle_v3f32_v4f32__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -924,22 +924,22 @@ define void @v_shuffle_v3f32_v4f32__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -975,18 +975,18 @@ define void @v_shuffle_v3f32_v4f32__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1021,18 +1021,18 @@ define void @v_shuffle_v3f32_v4f32__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1067,18 +1067,18 @@ define void @v_shuffle_v3f32_v4f32__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1114,19 +1114,19 @@ define void @v_shuffle_v3f32_v4f32__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1161,18 +1161,18 @@ define void @v_shuffle_v3f32_v4f32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1206,18 +1206,18 @@ define void @v_shuffle_v3f32_v4f32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> zeroinitializer store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1252,19 +1252,19 @@ define void @v_shuffle_v3f32_v4f32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1299,18 +1299,18 @@ define void @v_shuffle_v3f32_v4f32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1346,19 +1346,19 @@ define void @v_shuffle_v3f32_v4f32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1392,18 +1392,18 @@ define void @v_shuffle_v3f32_v4f32__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1444,23 +1444,23 @@ define void @v_shuffle_v3f32_v4f32__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1503,23 +1503,23 @@ define void @v_shuffle_v3f32_v4f32__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1562,23 +1562,23 @@ define void @v_shuffle_v3f32_v4f32__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1619,22 +1619,22 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1677,23 +1677,23 @@ define void @v_shuffle_v3f32_v4f32__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1735,22 +1735,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1792,21 +1792,21 @@ define void @v_shuffle_v3f32_v4f32__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1849,22 +1849,22 @@ define void @v_shuffle_v3f32_v4f32__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1905,22 +1905,22 @@ define void @v_shuffle_v3f32_v4f32__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -1962,23 +1962,23 @@ define void @v_shuffle_v3f32_v4f32__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2011,17 +2011,17 @@ define void @v_shuffle_v3f32_v4f32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2053,17 +2053,17 @@ define void @v_shuffle_v3f32_v4f32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2097,18 +2097,18 @@ define void @v_shuffle_v3f32_v4f32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2142,18 +2142,18 @@ define void @v_shuffle_v3f32_v4f32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2187,18 +2187,18 @@ define void @v_shuffle_v3f32_v4f32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2230,17 +2230,17 @@ define void @v_shuffle_v3f32_v4f32__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2280,22 +2280,22 @@ define void @v_shuffle_v3f32_v4f32__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2336,22 +2336,22 @@ define void @v_shuffle_v3f32_v4f32__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2392,22 +2392,22 @@ define void @v_shuffle_v3f32_v4f32__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2448,22 +2448,22 @@ define void @v_shuffle_v3f32_v4f32__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2506,23 +2506,23 @@ define void @v_shuffle_v3f32_v4f32__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2565,22 +2565,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2621,21 +2621,21 @@ define void @v_shuffle_v3f32_v4f32__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2678,22 +2678,22 @@ define void @v_shuffle_v3f32_v4f32__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2734,22 +2734,22 @@ define void @v_shuffle_v3f32_v4f32__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2791,23 +2791,23 @@ define void @v_shuffle_v3f32_v4f32__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -2840,17 +2840,17 @@ define void @v_shuffle_v3f32_v4f32__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2882,17 +2882,17 @@ define void @v_shuffle_v3f32_v4f32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2925,18 +2925,18 @@ define void @v_shuffle_v3f32_v4f32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2970,18 +2970,18 @@ define void @v_shuffle_v3f32_v4f32__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3015,18 +3015,18 @@ define void @v_shuffle_v3f32_v4f32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3058,17 +3058,17 @@ define void @v_shuffle_v3f32_v4f32__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3108,21 +3108,21 @@ define void @v_shuffle_v3f32_v4f32__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3163,21 +3163,21 @@ define void @v_shuffle_v3f32_v4f32__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v3f32_v4f32__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v3f32_v4f32__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3f32_v4f32__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3382,21 +3382,21 @@ define void @v_shuffle_v3f32_v4f32__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3437,21 +3437,21 @@ define void @v_shuffle_v3f32_v4f32__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v3f32_v4f32__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3548,21 +3548,21 @@ define void @v_shuffle_v3f32_v4f32__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3603,22 +3603,22 @@ define void @v_shuffle_v3f32_v4f32__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3652,18 +3652,18 @@ define void @v_shuffle_v3f32_v4f32__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3697,18 +3697,18 @@ define void @v_shuffle_v3f32_v4f32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3742,19 +3742,19 @@ define void @v_shuffle_v3f32_v4f32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3789,19 +3789,19 @@ define void @v_shuffle_v3f32_v4f32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3836,19 +3836,19 @@ define void @v_shuffle_v3f32_v4f32__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3881,18 +3881,18 @@ define void @v_shuffle_v3f32_v4f32__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3933,22 +3933,22 @@ define void @v_shuffle_v3f32_v4f32__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -3990,22 +3990,22 @@ define void @v_shuffle_v3f32_v4f32__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4047,22 +4047,22 @@ define void @v_shuffle_v3f32_v4f32__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4102,21 +4102,21 @@ define void @v_shuffle_v3f32_v4f32__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4158,22 +4158,22 @@ define void @v_shuffle_v3f32_v4f32__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4214,21 +4214,21 @@ define void @v_shuffle_v3f32_v4f32__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4269,22 +4269,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4326,22 +4326,22 @@ define void @v_shuffle_v3f32_v4f32__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4382,21 +4382,21 @@ define void @v_shuffle_v3f32_v4f32__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4438,22 +4438,22 @@ define void @v_shuffle_v3f32_v4f32__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4495,16 +4495,16 @@ define void @v_shuffle_v3f32_v4f32__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4535,17 +4535,17 @@ define void @v_shuffle_v3f32_v4f32__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4577,17 +4577,17 @@ define void @v_shuffle_v3f32_v4f32__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4619,17 +4619,17 @@ define void @v_shuffle_v3f32_v4f32__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> store <3 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4675,19 +4675,19 @@ define void @v_shuffle_v3f32_v4f32__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4723,18 +4723,18 @@ define void @v_shuffle_v3f32_v4f32__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4771,19 +4771,19 @@ define void @v_shuffle_v3f32_v4f32__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4818,18 +4818,18 @@ define void @v_shuffle_v3f32_v4f32__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4872,23 +4872,23 @@ define void @v_shuffle_v3f32_v4f32__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4927,21 +4927,21 @@ define void @v_shuffle_v3f32_v4f32__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -4982,22 +4982,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5040,22 +5040,22 @@ define void @v_shuffle_v3f32_v4f32__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5092,19 +5092,19 @@ define void @v_shuffle_v3f32_v4f32__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5140,20 +5140,20 @@ define void @v_shuffle_v3f32_v4f32__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5186,17 +5186,17 @@ define void @v_shuffle_v3f32_v4f32__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5236,22 +5236,22 @@ define void @v_shuffle_v3f32_v4f32__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5291,22 +5291,22 @@ define void @v_shuffle_v3f32_v4f32__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5347,21 +5347,21 @@ define void @v_shuffle_v3f32_v4f32__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5402,21 +5402,21 @@ define void @v_shuffle_v3f32_v4f32__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5449,17 +5449,17 @@ define void @v_shuffle_v3f32_v4f32__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5494,18 +5494,18 @@ define void @v_shuffle_v3f32_v4f32__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5540,18 +5540,18 @@ define void @v_shuffle_v3f32_v4f32__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5586,18 +5586,18 @@ define void @v_shuffle_v3f32_v4f32__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5632,18 +5632,18 @@ define void @v_shuffle_v3f32_v4f32__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5686,23 +5686,23 @@ define void @v_shuffle_v3f32_v4f32__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5743,22 +5743,22 @@ define void @v_shuffle_v3f32_v4f32__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5800,22 +5800,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5858,22 +5858,22 @@ define void @v_shuffle_v3f32_v4f32__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5910,19 +5910,19 @@ define void @v_shuffle_v3f32_v4f32__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -5958,20 +5958,20 @@ define void @v_shuffle_v3f32_v4f32__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6004,17 +6004,17 @@ define void @v_shuffle_v3f32_v4f32__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6055,22 +6055,22 @@ define void @v_shuffle_v3f32_v4f32__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6111,22 +6111,22 @@ define void @v_shuffle_v3f32_v4f32__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6167,21 +6167,21 @@ define void @v_shuffle_v3f32_v4f32__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6222,21 +6222,21 @@ define void @v_shuffle_v3f32_v4f32__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6269,17 +6269,17 @@ define void @v_shuffle_v3f32_v4f32__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6313,18 +6313,18 @@ define void @v_shuffle_v3f32_v4f32__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6359,18 +6359,18 @@ define void @v_shuffle_v3f32_v4f32__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6405,18 +6405,18 @@ define void @v_shuffle_v3f32_v4f32__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6449,17 +6449,17 @@ define void @v_shuffle_v3f32_v4f32__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6500,22 +6500,22 @@ define void @v_shuffle_v3f32_v4f32__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v3f32_v4f32__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6612,21 +6612,21 @@ define void @v_shuffle_v3f32_v4f32__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6667,21 +6667,21 @@ define void @v_shuffle_v3f32_v4f32__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6717,20 +6717,20 @@ define void @v_shuffle_v3f32_v4f32__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6763,17 +6763,17 @@ define void @v_shuffle_v3f32_v4f32__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6807,18 +6807,18 @@ define void @v_shuffle_v3f32_v4f32__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6859,22 +6859,22 @@ define void @v_shuffle_v3f32_v4f32__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6916,22 +6916,22 @@ define void @v_shuffle_v3f32_v4f32__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -6973,22 +6973,22 @@ define void @v_shuffle_v3f32_v4f32__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7030,22 +7030,22 @@ define void @v_shuffle_v3f32_v4f32__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7080,18 +7080,18 @@ define void @v_shuffle_v3f32_v4f32__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7126,19 +7126,19 @@ define void @v_shuffle_v3f32_v4f32__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7174,19 +7174,19 @@ define void @v_shuffle_v3f32_v4f32__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7220,18 +7220,18 @@ define void @v_shuffle_v3f32_v4f32__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7273,23 +7273,23 @@ define void @v_shuffle_v3f32_v4f32__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7330,22 +7330,22 @@ define void @v_shuffle_v3f32_v4f32__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7387,22 +7387,22 @@ define void @v_shuffle_v3f32_v4f32__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7444,22 +7444,22 @@ define void @v_shuffle_v3f32_v4f32__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7495,20 +7495,20 @@ define void @v_shuffle_v3f32_v4f32__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7543,18 +7543,18 @@ define void @v_shuffle_v3f32_v4f32__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7589,19 +7589,19 @@ define void @v_shuffle_v3f32_v4f32__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3f32_v4f32__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7646,17 +7646,17 @@ define void @s_shuffle_v3f32_v4f32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -7688,17 +7688,17 @@ define void @s_shuffle_v3f32_v4f32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -7730,17 +7730,17 @@ define void @s_shuffle_v3f32_v4f32__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -7772,17 +7772,17 @@ define void @s_shuffle_v3f32_v4f32__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -7828,17 +7828,17 @@ define void @s_shuffle_v3f32_v4f32__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7871,17 +7871,17 @@ define void @s_shuffle_v3f32_v4f32__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7914,17 +7914,17 @@ define void @s_shuffle_v3f32_v4f32__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -7965,21 +7965,21 @@ define void @s_shuffle_v3f32_v4f32__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8018,20 +8018,20 @@ define void @s_shuffle_v3f32_v4f32__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8072,21 +8072,21 @@ define void @s_shuffle_v3f32_v4f32__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8127,21 +8127,21 @@ define void @s_shuffle_v3f32_v4f32__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8176,18 +8176,18 @@ define void @s_shuffle_v3f32_v4f32__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8241,18 +8241,18 @@ define void @s_shuffle_v3f32_v4f32__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8287,18 +8287,18 @@ define void @s_shuffle_v3f32_v4f32__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8341,22 +8341,22 @@ define void @s_shuffle_v3f32_v4f32__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8399,22 +8399,22 @@ define void @s_shuffle_v3f32_v4f32__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8455,21 +8455,21 @@ define void @s_shuffle_v3f32_v4f32__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8512,22 +8512,22 @@ define void @s_shuffle_v3f32_v4f32__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8564,19 +8564,19 @@ define void @s_shuffle_v3f32_v4f32__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8613,19 +8613,19 @@ define void @s_shuffle_v3f32_v4f32__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8682,19 +8682,19 @@ define void @s_shuffle_v3f32_v4f32__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -8729,18 +8729,18 @@ define void @s_shuffle_v3f32_v4f32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -8795,19 +8795,19 @@ define void @s_shuffle_v3f32_v4f32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -8843,19 +8843,19 @@ define void @s_shuffle_v3f32_v4f32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -8891,19 +8891,19 @@ define void @s_shuffle_v3f32_v4f32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -8937,18 +8937,18 @@ define void @s_shuffle_v3f32_v4f32__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -8990,22 +8990,22 @@ define void @s_shuffle_v3f32_v4f32__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9048,22 +9048,22 @@ define void @s_shuffle_v3f32_v4f32__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9106,22 +9106,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9162,21 +9162,21 @@ define void @s_shuffle_v3f32_v4f32__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9219,22 +9219,22 @@ define void @s_shuffle_v3f32_v4f32__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9277,22 +9277,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9335,22 +9335,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9393,22 +9393,22 @@ define void @s_shuffle_v3f32_v4f32__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9449,21 +9449,21 @@ define void @s_shuffle_v3f32_v4f32__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9506,22 +9506,22 @@ define void @s_shuffle_v3f32_v4f32__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9673,21 +9673,21 @@ define void @s_shuffle_v3f32_v4f32__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9728,21 +9728,21 @@ define void @s_shuffle_v3f32_v4f32__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9783,21 +9783,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9838,21 +9838,21 @@ define void @s_shuffle_v3f32_v4f32__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9895,22 +9895,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -9953,22 +9953,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10011,22 +10011,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10069,22 +10069,22 @@ define void @s_shuffle_v3f32_v4f32__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10125,21 +10125,21 @@ define void @s_shuffle_v3f32_v4f32__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10182,22 +10182,22 @@ define void @s_shuffle_v3f32_v4f32__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10349,21 +10349,21 @@ define void @s_shuffle_v3f32_v4f32__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10404,21 +10404,21 @@ define void @s_shuffle_v3f32_v4f32__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10459,21 +10459,21 @@ define void @s_shuffle_v3f32_v4f32__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10512,20 +10512,20 @@ define void @s_shuffle_v3f32_v4f32__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10568,22 +10568,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10622,20 +10622,20 @@ define void @s_shuffle_v3f32_v4f32__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10676,21 +10676,21 @@ define void @s_shuffle_v3f32_v4f32__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10731,21 +10731,21 @@ define void @s_shuffle_v3f32_v4f32__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10786,21 +10786,21 @@ define void @s_shuffle_v3f32_v4f32__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10841,21 +10841,21 @@ define void @s_shuffle_v3f32_v4f32__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -10890,18 +10890,18 @@ define void @s_shuffle_v3f32_v4f32__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -10956,19 +10956,19 @@ define void @s_shuffle_v3f32_v4f32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11004,19 +11004,19 @@ define void @s_shuffle_v3f32_v4f32__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11052,19 +11052,19 @@ define void @s_shuffle_v3f32_v4f32__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11098,18 +11098,18 @@ define void @s_shuffle_v3f32_v4f32__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11151,22 +11151,22 @@ define void @s_shuffle_v3f32_v4f32__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11209,22 +11209,22 @@ define void @s_shuffle_v3f32_v4f32__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11267,22 +11267,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11323,21 +11323,21 @@ define void @s_shuffle_v3f32_v4f32__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11380,22 +11380,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11436,21 +11436,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11493,22 +11493,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11551,22 +11551,22 @@ define void @s_shuffle_v3f32_v4f32__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11607,21 +11607,21 @@ define void @s_shuffle_v3f32_v4f32__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11664,22 +11664,22 @@ define void @s_shuffle_v3f32_v4f32__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11724,17 +11724,17 @@ define void @s_shuffle_v3f32_v4f32__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11766,17 +11766,17 @@ define void @s_shuffle_v3f32_v4f32__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11808,17 +11808,17 @@ define void @s_shuffle_v3f32_v4f32__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11850,17 +11850,17 @@ define void @s_shuffle_v3f32_v4f32__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x float> %shuf) @@ -11910,19 +11910,19 @@ define void @s_shuffle_v3f32_v4f32__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -11959,19 +11959,19 @@ define void @s_shuffle_v3f32_v4f32__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12008,19 +12008,19 @@ define void @s_shuffle_v3f32_v4f32__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12055,18 +12055,18 @@ define void @s_shuffle_v3f32_v4f32__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12109,22 +12109,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12165,21 +12165,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12222,22 +12222,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12280,22 +12280,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12332,19 +12332,19 @@ define void @s_shuffle_v3f32_v4f32__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12381,19 +12381,19 @@ define void @s_shuffle_v3f32_v4f32__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12453,21 +12453,21 @@ define void @s_shuffle_v3f32_v4f32__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12508,21 +12508,21 @@ define void @s_shuffle_v3f32_v4f32__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12563,21 +12563,21 @@ define void @s_shuffle_v3f32_v4f32__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12618,21 +12618,21 @@ define void @s_shuffle_v3f32_v4f32__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12746,18 +12746,18 @@ define void @s_shuffle_v3f32_v4f32__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12800,22 +12800,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12856,21 +12856,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12913,22 +12913,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -12971,22 +12971,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13023,19 +13023,19 @@ define void @s_shuffle_v3f32_v4f32__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13072,19 +13072,19 @@ define void @s_shuffle_v3f32_v4f32__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13144,21 +13144,21 @@ define void @s_shuffle_v3f32_v4f32__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13199,21 +13199,21 @@ define void @s_shuffle_v3f32_v4f32__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13254,21 +13254,21 @@ define void @s_shuffle_v3f32_v4f32__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13309,21 +13309,21 @@ define void @s_shuffle_v3f32_v4f32__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13462,21 +13462,21 @@ define void @s_shuffle_v3f32_v4f32__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13517,21 +13517,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13572,21 +13572,21 @@ define void @s_shuffle_v3f32_v4f32__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13627,21 +13627,21 @@ define void @s_shuffle_v3f32_v4f32__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13678,19 +13678,19 @@ define void @s_shuffle_v3f32_v4f32__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13744,18 +13744,18 @@ define void @s_shuffle_v3f32_v4f32__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13796,21 +13796,21 @@ define void @s_shuffle_v3f32_v4f32__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13853,22 +13853,22 @@ define void @s_shuffle_v3f32_v4f32__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13911,22 +13911,22 @@ define void @s_shuffle_v3f32_v4f32__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -13969,22 +13969,22 @@ define void @s_shuffle_v3f32_v4f32__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14041,19 +14041,19 @@ define void @s_shuffle_v3f32_v4f32__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14090,19 +14090,19 @@ define void @s_shuffle_v3f32_v4f32__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14137,18 +14137,18 @@ define void @s_shuffle_v3f32_v4f32__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14191,22 +14191,22 @@ define void @s_shuffle_v3f32_v4f32__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14247,21 +14247,21 @@ define void @s_shuffle_v3f32_v4f32__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14304,22 +14304,22 @@ define void @s_shuffle_v3f32_v4f32__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14362,22 +14362,22 @@ define void @s_shuffle_v3f32_v4f32__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14414,19 +14414,19 @@ define void @s_shuffle_v3f32_v4f32__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> @@ -14483,19 +14483,19 @@ define void @s_shuffle_v3f32_v4f32__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3f32_v4f32__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3f32_v4f32__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v2i16.ll index 803637cb2dd36..7d5ba7c5f9d89 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v2i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i16_v2i16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3i16_v2i16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v3i16_v2i16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v3i16_v2i16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v3i16_v2i16__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v3i16_v2i16__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v3i16_v2i16__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v3i16_v2i16__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -383,22 +383,22 @@ define void @v_shuffle_v3i16_v2i16__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -441,22 +441,22 @@ define void @v_shuffle_v3i16_v2i16__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v2, s2 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v2, s2 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -493,19 +493,19 @@ define void @v_shuffle_v3i16_v2i16__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -544,20 +544,20 @@ define void @v_shuffle_v3i16_v2i16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -594,19 +594,19 @@ define void @v_shuffle_v3i16_v2i16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -642,19 +642,19 @@ define void @v_shuffle_v3i16_v2i16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> zeroinitializer store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -688,19 +688,19 @@ define void @v_shuffle_v3i16_v2i16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -736,19 +736,19 @@ define void @v_shuffle_v3i16_v2i16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -788,22 +788,22 @@ define void @v_shuffle_v3i16_v2i16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -844,22 +844,22 @@ define void @v_shuffle_v3i16_v2i16__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -902,22 +902,22 @@ define void @v_shuffle_v3i16_v2i16__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -958,22 +958,22 @@ define void @v_shuffle_v3i16_v2i16__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1006,17 +1006,17 @@ define void @v_shuffle_v3i16_v2i16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1048,17 +1048,17 @@ define void @v_shuffle_v3i16_v2i16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1096,20 +1096,20 @@ define void @v_shuffle_v3i16_v2i16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1141,17 +1141,17 @@ define void @v_shuffle_v3i16_v2i16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1195,23 +1195,23 @@ define void @v_shuffle_v3i16_v2i16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1252,22 +1252,22 @@ define void @v_shuffle_v3i16_v2i16__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v2, 16 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1308,22 +1308,22 @@ define void @v_shuffle_v3i16_v2i16__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1364,22 +1364,22 @@ define void @v_shuffle_v3i16_v2i16__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v2, v2, 16 -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v2, v2, 16 +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1421,16 +1421,16 @@ define void @v_shuffle_v3i16_v2i16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1462,17 +1462,17 @@ define void @v_shuffle_v3i16_v2i16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1517,19 +1517,19 @@ define void @v_shuffle_v3i16_v2i16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1564,19 +1564,19 @@ define void @v_shuffle_v3i16_v2i16__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1617,22 +1617,22 @@ define void @v_shuffle_v3i16_v2i16__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1675,22 +1675,22 @@ define void @v_shuffle_v3i16_v2i16__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1723,17 +1723,17 @@ define void @v_shuffle_v3i16_v2i16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1778,23 +1778,23 @@ define void @v_shuffle_v3i16_v2i16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1839,24 +1839,24 @@ define void @v_shuffle_v3i16_v2i16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1889,17 +1889,17 @@ define void @v_shuffle_v3i16_v2i16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1936,19 +1936,19 @@ define void @v_shuffle_v3i16_v2i16__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -1991,22 +1991,22 @@ define void @v_shuffle_v3i16_v2i16__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: global_store_short v0, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: global_store_short v0, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2051,24 +2051,24 @@ define void @v_shuffle_v3i16_v2i16__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: global_store_dword v0, v1, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2105,19 +2105,19 @@ define void @v_shuffle_v3i16_v2i16__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v2i16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v2i16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v0, v2, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2163,17 +2163,17 @@ define void @s_shuffle_v3i16_v2i16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -2206,17 +2206,17 @@ define void @s_shuffle_v3i16_v2i16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -2264,17 +2264,17 @@ define void @s_shuffle_v3i16_v2i16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2316,21 +2316,21 @@ define void @s_shuffle_v3i16_v2i16__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2370,20 +2370,20 @@ define void @s_shuffle_v3i16_v2i16__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2419,18 +2419,18 @@ define void @s_shuffle_v3i16_v2i16__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2464,17 +2464,17 @@ define void @s_shuffle_v3i16_v2i16__3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2514,20 +2514,20 @@ define void @s_shuffle_v3i16_v2i16__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2569,21 +2569,21 @@ define void @s_shuffle_v3i16_v2i16__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2639,18 +2639,18 @@ define void @s_shuffle_v3i16_v2i16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2724,18 +2724,18 @@ define void @s_shuffle_v3i16_v2i16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -2795,21 +2795,21 @@ define void @s_shuffle_v3i16_v2i16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2849,20 +2849,20 @@ define void @s_shuffle_v3i16_v2i16__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2902,20 +2902,20 @@ define void @s_shuffle_v3i16_v2i16__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -2957,21 +2957,21 @@ define void @s_shuffle_v3i16_v2i16__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3045,18 +3045,18 @@ define void @s_shuffle_v3i16_v2i16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -3116,21 +3116,21 @@ define void @s_shuffle_v3i16_v2i16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3172,21 +3172,21 @@ define void @s_shuffle_v3i16_v2i16__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3230,22 +3230,22 @@ define void @s_shuffle_v3i16_v2i16__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3289,22 +3289,22 @@ define void @s_shuffle_v3i16_v2i16__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3351,17 +3351,17 @@ define void @s_shuffle_v3i16_v2i16__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -3394,17 +3394,17 @@ define void @s_shuffle_v3i16_v2i16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -3454,18 +3454,18 @@ define void @s_shuffle_v3i16_v2i16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3527,21 +3527,21 @@ define void @s_shuffle_v3i16_v2i16__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3581,20 +3581,20 @@ define void @s_shuffle_v3i16_v2i16__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3656,21 +3656,21 @@ define void @s_shuffle_v3i16_v2i16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3712,21 +3712,21 @@ define void @s_shuffle_v3i16_v2i16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3782,18 +3782,18 @@ define void @s_shuffle_v3i16_v2i16__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3835,21 +3835,21 @@ define void @s_shuffle_v3i16_v2i16__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3891,21 +3891,21 @@ define void @s_shuffle_v3i16_v2i16__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> @@ -3941,18 +3941,18 @@ define void @s_shuffle_v3i16_v2i16__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v2i16__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v2i16__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll index 29cb877bffb60..0cf6da3659dde 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i16_v3i16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -41,17 +41,17 @@ define void @v_shuffle_v3i16_v3i16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -84,17 +84,17 @@ define void @v_shuffle_v3i16_v3i16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -125,16 +125,16 @@ define void @v_shuffle_v3i16_v3i16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v3i16_v3i16__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -222,16 +222,16 @@ define void @v_shuffle_v3i16_v3i16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -274,22 +274,22 @@ define void @v_shuffle_v3i16_v3i16__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -332,22 +332,22 @@ define void @v_shuffle_v3i16_v3i16__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -390,22 +390,22 @@ define void @v_shuffle_v3i16_v3i16__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -442,18 +442,18 @@ define void @v_shuffle_v3i16_v3i16__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -490,18 +490,18 @@ define void @v_shuffle_v3i16_v3i16__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -538,18 +538,18 @@ define void @v_shuffle_v3i16_v3i16__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -594,23 +594,23 @@ define void @v_shuffle_v3i16_v3i16__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -655,23 +655,23 @@ define void @v_shuffle_v3i16_v3i16__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -716,23 +716,23 @@ define void @v_shuffle_v3i16_v3i16__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -771,19 +771,19 @@ define void @v_shuffle_v3i16_v3i16__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -822,19 +822,19 @@ define void @v_shuffle_v3i16_v3i16__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -873,19 +873,19 @@ define void @v_shuffle_v3i16_v3i16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -922,19 +922,19 @@ define void @v_shuffle_v3i16_v3i16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -971,19 +971,19 @@ define void @v_shuffle_v3i16_v3i16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer @@ -1018,19 +1018,19 @@ define void @v_shuffle_v3i16_v3i16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1067,19 +1067,19 @@ define void @v_shuffle_v3i16_v3i16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1114,19 +1114,19 @@ define void @v_shuffle_v3i16_v3i16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1167,22 +1167,22 @@ define void @v_shuffle_v3i16_v3i16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1227,23 +1227,23 @@ define void @v_shuffle_v3i16_v3i16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1284,20 +1284,20 @@ define void @v_shuffle_v3i16_v3i16__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1342,23 +1342,23 @@ define void @v_shuffle_v3i16_v3i16__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1403,23 +1403,23 @@ define void @v_shuffle_v3i16_v3i16__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1464,23 +1464,23 @@ define void @v_shuffle_v3i16_v3i16__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1525,23 +1525,23 @@ define void @v_shuffle_v3i16_v3i16__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1576,17 +1576,17 @@ define void @v_shuffle_v3i16_v3i16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1619,17 +1619,17 @@ define void @v_shuffle_v3i16_v3i16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1668,20 +1668,20 @@ define void @v_shuffle_v3i16_v3i16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1720,20 +1720,20 @@ define void @v_shuffle_v3i16_v3i16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1766,17 +1766,17 @@ define void @v_shuffle_v3i16_v3i16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -1821,24 +1821,24 @@ define void @v_shuffle_v3i16_v3i16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1885,24 +1885,24 @@ define void @v_shuffle_v3i16_v3i16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1943,20 +1943,20 @@ define void @v_shuffle_v3i16_v3i16__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2001,23 +2001,23 @@ define void @v_shuffle_v3i16_v3i16__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2062,23 +2062,23 @@ define void @v_shuffle_v3i16_v3i16__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2123,23 +2123,23 @@ define void @v_shuffle_v3i16_v3i16__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2184,23 +2184,23 @@ define void @v_shuffle_v3i16_v3i16__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2237,19 +2237,19 @@ define void @v_shuffle_v3i16_v3i16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2286,19 +2286,19 @@ define void @v_shuffle_v3i16_v3i16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2333,19 +2333,19 @@ define void @v_shuffle_v3i16_v3i16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2382,19 +2382,19 @@ define void @v_shuffle_v3i16_v3i16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2429,19 +2429,19 @@ define void @v_shuffle_v3i16_v3i16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2482,22 +2482,22 @@ define void @v_shuffle_v3i16_v3i16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2542,23 +2542,23 @@ define void @v_shuffle_v3i16_v3i16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2599,20 +2599,20 @@ define void @v_shuffle_v3i16_v3i16__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v3, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2657,23 +2657,23 @@ define void @v_shuffle_v3i16_v3i16__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2718,23 +2718,23 @@ define void @v_shuffle_v3i16_v3i16__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2779,23 +2779,23 @@ define void @v_shuffle_v3i16_v3i16__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2840,23 +2840,23 @@ define void @v_shuffle_v3i16_v3i16__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2903,17 +2903,17 @@ define void @v_shuffle_v3i16_v3i16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2946,17 +2946,17 @@ define void @v_shuffle_v3i16_v3i16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -2987,16 +2987,16 @@ define void @v_shuffle_v3i16_v3i16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -3043,19 +3043,19 @@ define void @v_shuffle_v3i16_v3i16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3094,19 +3094,19 @@ define void @v_shuffle_v3i16_v3i16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3141,17 +3141,17 @@ define void @v_shuffle_v3i16_v3i16__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3196,22 +3196,22 @@ define void @v_shuffle_v3i16_v3i16__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3256,22 +3256,22 @@ define void @v_shuffle_v3i16_v3i16__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3316,22 +3316,22 @@ define void @v_shuffle_v3i16_v3i16__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3370,19 +3370,19 @@ define void @v_shuffle_v3i16_v3i16__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3417,17 +3417,17 @@ define void @v_shuffle_v3i16_v3i16__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3474,24 +3474,24 @@ define void @v_shuffle_v3i16_v3i16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3538,24 +3538,24 @@ define void @v_shuffle_v3i16_v3i16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3602,24 +3602,24 @@ define void @v_shuffle_v3i16_v3i16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3654,17 +3654,17 @@ define void @v_shuffle_v3i16_v3i16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3705,20 +3705,20 @@ define void @v_shuffle_v3i16_v3i16__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3759,20 +3759,20 @@ define void @v_shuffle_v3i16_v3i16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3807,17 +3807,17 @@ define void @v_shuffle_v3i16_v3i16__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3862,22 +3862,22 @@ define void @v_shuffle_v3i16_v3i16__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3922,22 +3922,22 @@ define void @v_shuffle_v3i16_v3i16__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3982,22 +3982,22 @@ define void @v_shuffle_v3i16_v3i16__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4036,19 +4036,19 @@ define void @v_shuffle_v3i16_v3i16__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4085,19 +4085,19 @@ define void @v_shuffle_v3i16_v3i16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4142,22 +4142,22 @@ define void @v_shuffle_v3i16_v3i16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4200,22 +4200,22 @@ define void @v_shuffle_v3i16_v3i16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4260,22 +4260,22 @@ define void @v_shuffle_v3i16_v3i16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4314,19 +4314,19 @@ define void @v_shuffle_v3i16_v3i16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4363,19 +4363,19 @@ define void @v_shuffle_v3i16_v3i16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4410,17 +4410,17 @@ define void @v_shuffle_v3i16_v3i16__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4465,22 +4465,22 @@ define void @v_shuffle_v3i16_v3i16__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4525,22 +4525,22 @@ define void @v_shuffle_v3i16_v3i16__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4585,22 +4585,22 @@ define void @v_shuffle_v3i16_v3i16__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4639,19 +4639,19 @@ define void @v_shuffle_v3i16_v3i16__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4690,19 +4690,19 @@ define void @v_shuffle_v3i16_v3i16__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v3i16__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4751,17 +4751,17 @@ define void @s_shuffle_v3i16_v3i16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -4795,17 +4795,17 @@ define void @s_shuffle_v3i16_v3i16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -4839,17 +4839,17 @@ define void @s_shuffle_v3i16_v3i16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -4899,17 +4899,17 @@ define void @s_shuffle_v3i16_v3i16__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4945,17 +4945,17 @@ define void @s_shuffle_v3i16_v3i16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4997,20 +4997,20 @@ define void @s_shuffle_v3i16_v3i16__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5052,20 +5052,20 @@ define void @s_shuffle_v3i16_v3i16__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5107,20 +5107,20 @@ define void @s_shuffle_v3i16_v3i16__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5156,17 +5156,17 @@ define void @s_shuffle_v3i16_v3i16__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5202,17 +5202,17 @@ define void @s_shuffle_v3i16_v3i16__5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5248,17 +5248,17 @@ define void @s_shuffle_v3i16_v3i16__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5302,21 +5302,21 @@ define void @s_shuffle_v3i16_v3i16__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5360,21 +5360,21 @@ define void @s_shuffle_v3i16_v3i16__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5416,20 +5416,20 @@ define void @s_shuffle_v3i16_v3i16__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5467,18 +5467,18 @@ define void @s_shuffle_v3i16_v3i16__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5516,18 +5516,18 @@ define void @s_shuffle_v3i16_v3i16__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5587,18 +5587,18 @@ define void @s_shuffle_v3i16_v3i16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -5634,18 +5634,18 @@ define void @s_shuffle_v3i16_v3i16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer @@ -5683,19 +5683,19 @@ define void @s_shuffle_v3i16_v3i16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -5731,18 +5731,18 @@ define void @s_shuffle_v3i16_v3i16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -5778,18 +5778,18 @@ define void @s_shuffle_v3i16_v3i16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -5833,22 +5833,22 @@ define void @s_shuffle_v3i16_v3i16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5892,21 +5892,21 @@ define void @s_shuffle_v3i16_v3i16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5950,21 +5950,21 @@ define void @s_shuffle_v3i16_v3i16__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6008,21 +6008,21 @@ define void @s_shuffle_v3i16_v3i16__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6066,21 +6066,21 @@ define void @s_shuffle_v3i16_v3i16__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6124,21 +6124,21 @@ define void @s_shuffle_v3i16_v3i16__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6182,21 +6182,21 @@ define void @s_shuffle_v3i16_v3i16__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6274,18 +6274,18 @@ define void @s_shuffle_v3i16_v3i16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -6321,18 +6321,18 @@ define void @s_shuffle_v3i16_v3i16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -6394,21 +6394,21 @@ define void @s_shuffle_v3i16_v3i16__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6452,21 +6452,21 @@ define void @s_shuffle_v3i16_v3i16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6510,21 +6510,21 @@ define void @s_shuffle_v3i16_v3i16__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6568,21 +6568,21 @@ define void @s_shuffle_v3i16_v3i16__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6626,21 +6626,21 @@ define void @s_shuffle_v3i16_v3i16__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6684,21 +6684,21 @@ define void @s_shuffle_v3i16_v3i16__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6742,21 +6742,21 @@ define void @s_shuffle_v3i16_v3i16__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6834,18 +6834,18 @@ define void @s_shuffle_v3i16_v3i16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -6927,21 +6927,21 @@ define void @s_shuffle_v3i16_v3i16__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6983,20 +6983,20 @@ define void @s_shuffle_v3i16_v3i16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7038,20 +7038,20 @@ define void @s_shuffle_v3i16_v3i16__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7093,20 +7093,20 @@ define void @s_shuffle_v3i16_v3i16__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7148,20 +7148,20 @@ define void @s_shuffle_v3i16_v3i16__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7203,20 +7203,20 @@ define void @s_shuffle_v3i16_v3i16__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7258,20 +7258,20 @@ define void @s_shuffle_v3i16_v3i16__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7321,17 +7321,17 @@ define void @s_shuffle_v3i16_v3i16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -7365,17 +7365,17 @@ define void @s_shuffle_v3i16_v3i16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -7409,17 +7409,17 @@ define void @s_shuffle_v3i16_v3i16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> @@ -7473,19 +7473,19 @@ define void @s_shuffle_v3i16_v3i16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7523,18 +7523,18 @@ define void @s_shuffle_v3i16_v3i16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7572,18 +7572,18 @@ define void @s_shuffle_v3i16_v3i16__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7627,21 +7627,21 @@ define void @s_shuffle_v3i16_v3i16__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7685,21 +7685,21 @@ define void @s_shuffle_v3i16_v3i16__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7743,21 +7743,21 @@ define void @s_shuffle_v3i16_v3i16__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7795,18 +7795,18 @@ define void @s_shuffle_v3i16_v3i16__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7872,21 +7872,21 @@ define void @s_shuffle_v3i16_v3i16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7930,21 +7930,21 @@ define void @s_shuffle_v3i16_v3i16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7988,21 +7988,21 @@ define void @s_shuffle_v3i16_v3i16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8062,18 +8062,18 @@ define void @s_shuffle_v3i16_v3i16__4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8111,18 +8111,18 @@ define void @s_shuffle_v3i16_v3i16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8160,18 +8160,18 @@ define void @s_shuffle_v3i16_v3i16__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8215,21 +8215,21 @@ define void @s_shuffle_v3i16_v3i16__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8273,21 +8273,21 @@ define void @s_shuffle_v3i16_v3i16__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8331,21 +8331,21 @@ define void @s_shuffle_v3i16_v3i16__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8383,18 +8383,18 @@ define void @s_shuffle_v3i16_v3i16__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8458,20 +8458,20 @@ define void @s_shuffle_v3i16_v3i16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8515,21 +8515,21 @@ define void @s_shuffle_v3i16_v3i16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8571,20 +8571,20 @@ define void @s_shuffle_v3i16_v3i16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8644,18 +8644,18 @@ define void @s_shuffle_v3i16_v3i16__4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8719,20 +8719,20 @@ define void @s_shuffle_v3i16_v3i16__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8774,20 +8774,20 @@ define void @s_shuffle_v3i16_v3i16__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8829,20 +8829,20 @@ define void @s_shuffle_v3i16_v3i16__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v3i16__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll index dcffb2b35f5d0..977055e546bba 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i16_v4i16__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3i16_v4i16__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -82,17 +82,17 @@ define void @v_shuffle_v3i16_v4i16__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -122,16 +122,16 @@ define void @v_shuffle_v3i16_v4i16__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v3i16_v4i16__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v3i16_v4i16__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -257,16 +257,16 @@ define void @v_shuffle_v3i16_v4i16__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -299,17 +299,17 @@ define void @v_shuffle_v3i16_v4i16__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -348,21 +348,21 @@ define void @v_shuffle_v3i16_v4i16__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -403,22 +403,22 @@ define void @v_shuffle_v3i16_v4i16__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -457,21 +457,21 @@ define void @v_shuffle_v3i16_v4i16__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -512,22 +512,22 @@ define void @v_shuffle_v3i16_v4i16__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -560,17 +560,17 @@ define void @v_shuffle_v3i16_v4i16__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -605,18 +605,18 @@ define void @v_shuffle_v3i16_v4i16__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v3i16_v4i16__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v3i16_v4i16__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v3i16_v4i16__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v3i16_v4i16__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -866,23 +866,23 @@ define void @v_shuffle_v3i16_v4i16__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -925,23 +925,23 @@ define void @v_shuffle_v3i16_v4i16__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -978,19 +978,19 @@ define void @v_shuffle_v3i16_v4i16__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1027,19 +1027,19 @@ define void @v_shuffle_v3i16_v4i16__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1076,19 +1076,19 @@ define void @v_shuffle_v3i16_v4i16__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1127,20 +1127,20 @@ define void @v_shuffle_v3i16_v4i16__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1175,19 +1175,19 @@ define void @v_shuffle_v3i16_v4i16__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1223,19 +1223,19 @@ define void @v_shuffle_v3i16_v4i16__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1269,19 +1269,19 @@ define void @v_shuffle_v3i16_v4i16__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1317,19 +1317,19 @@ define void @v_shuffle_v3i16_v4i16__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1363,19 +1363,19 @@ define void @v_shuffle_v3i16_v4i16__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1409,19 +1409,19 @@ define void @v_shuffle_v3i16_v4i16__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1461,22 +1461,22 @@ define void @v_shuffle_v3i16_v4i16__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1519,23 +1519,23 @@ define void @v_shuffle_v3i16_v4i16__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1576,22 +1576,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1632,22 +1632,22 @@ define void @v_shuffle_v3i16_v4i16__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1690,23 +1690,23 @@ define void @v_shuffle_v3i16_v4i16__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1747,22 +1747,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1805,23 +1805,23 @@ define void @v_shuffle_v3i16_v4i16__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1862,22 +1862,22 @@ define void @v_shuffle_v3i16_v4i16__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1920,23 +1920,23 @@ define void @v_shuffle_v3i16_v4i16__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -1977,22 +1977,22 @@ define void @v_shuffle_v3i16_v4i16__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2025,17 +2025,17 @@ define void @v_shuffle_v3i16_v4i16__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2067,17 +2067,17 @@ define void @v_shuffle_v3i16_v4i16__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2115,20 +2115,20 @@ define void @v_shuffle_v3i16_v4i16__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2166,20 +2166,20 @@ define void @v_shuffle_v3i16_v4i16__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2217,20 +2217,20 @@ define void @v_shuffle_v3i16_v4i16__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2262,17 +2262,17 @@ define void @v_shuffle_v3i16_v4i16__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2316,24 +2316,24 @@ define void @v_shuffle_v3i16_v4i16__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2378,24 +2378,24 @@ define void @v_shuffle_v3i16_v4i16__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2440,24 +2440,24 @@ define void @v_shuffle_v3i16_v4i16__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2498,22 +2498,22 @@ define void @v_shuffle_v3i16_v4i16__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2554,22 +2554,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2610,22 +2610,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2668,23 +2668,23 @@ define void @v_shuffle_v3i16_v4i16__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2725,22 +2725,22 @@ define void @v_shuffle_v3i16_v4i16__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2783,23 +2783,23 @@ define void @v_shuffle_v3i16_v4i16__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2840,22 +2840,22 @@ define void @v_shuffle_v3i16_v4i16__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -2890,19 +2890,19 @@ define void @v_shuffle_v3i16_v4i16__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2938,19 +2938,19 @@ define void @v_shuffle_v3i16_v4i16__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2984,19 +2984,19 @@ define void @v_shuffle_v3i16_v4i16__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3032,19 +3032,19 @@ define void @v_shuffle_v3i16_v4i16__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3078,19 +3078,19 @@ define void @v_shuffle_v3i16_v4i16__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3124,19 +3124,19 @@ define void @v_shuffle_v3i16_v4i16__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3176,22 +3176,22 @@ define void @v_shuffle_v3i16_v4i16__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v2, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3234,23 +3234,23 @@ define void @v_shuffle_v3i16_v4i16__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3291,22 +3291,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3347,22 +3347,22 @@ define void @v_shuffle_v3i16_v4i16__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3403,22 +3403,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3461,23 +3461,23 @@ define void @v_shuffle_v3i16_v4i16__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3520,23 +3520,23 @@ define void @v_shuffle_v3i16_v4i16__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3577,22 +3577,22 @@ define void @v_shuffle_v3i16_v4i16__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3635,23 +3635,23 @@ define void @v_shuffle_v3i16_v4i16__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3692,22 +3692,22 @@ define void @v_shuffle_v3i16_v4i16__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -3740,17 +3740,17 @@ define void @v_shuffle_v3i16_v4i16__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3788,20 +3788,20 @@ define void @v_shuffle_v3i16_v4i16__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3839,20 +3839,20 @@ define void @v_shuffle_v3i16_v4i16__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3884,17 +3884,17 @@ define void @v_shuffle_v3i16_v4i16__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3932,20 +3932,20 @@ define void @v_shuffle_v3i16_v4i16__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3977,17 +3977,17 @@ define void @v_shuffle_v3i16_v4i16__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4031,24 +4031,24 @@ define void @v_shuffle_v3i16_v4i16__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4093,24 +4093,24 @@ define void @v_shuffle_v3i16_v4i16__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4155,24 +4155,24 @@ define void @v_shuffle_v3i16_v4i16__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4213,22 +4213,22 @@ define void @v_shuffle_v3i16_v4i16__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4269,22 +4269,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4327,23 +4327,23 @@ define void @v_shuffle_v3i16_v4i16__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4384,22 +4384,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4440,22 +4440,22 @@ define void @v_shuffle_v3i16_v4i16__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4498,23 +4498,23 @@ define void @v_shuffle_v3i16_v4i16__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4555,22 +4555,22 @@ define void @v_shuffle_v3i16_v4i16__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4614,17 +4614,17 @@ define void @v_shuffle_v3i16_v4i16__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4656,17 +4656,17 @@ define void @v_shuffle_v3i16_v4i16__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4696,16 +4696,16 @@ define void @v_shuffle_v3i16_v4i16__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4737,17 +4737,17 @@ define void @v_shuffle_v3i16_v4i16__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4792,19 +4792,19 @@ define void @v_shuffle_v3i16_v4i16__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4841,19 +4841,19 @@ define void @v_shuffle_v3i16_v4i16__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4888,19 +4888,19 @@ define void @v_shuffle_v3i16_v4i16__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4935,19 +4935,19 @@ define void @v_shuffle_v3i16_v4i16__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -4988,22 +4988,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5046,22 +5046,22 @@ define void @v_shuffle_v3i16_v4i16__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5102,22 +5102,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5160,22 +5160,22 @@ define void @v_shuffle_v3i16_v4i16__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5212,19 +5212,19 @@ define void @v_shuffle_v3i16_v4i16__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5259,19 +5259,19 @@ define void @v_shuffle_v3i16_v4i16__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5304,17 +5304,17 @@ define void @v_shuffle_v3i16_v4i16__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5359,24 +5359,24 @@ define void @v_shuffle_v3i16_v4i16__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5421,24 +5421,24 @@ define void @v_shuffle_v3i16_v4i16__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX942-NEXT: global_store_short v4, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5483,24 +5483,24 @@ define void @v_shuffle_v3i16_v4i16__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5545,24 +5545,24 @@ define void @v_shuffle_v3i16_v4i16__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5595,17 +5595,17 @@ define void @v_shuffle_v3i16_v4i16__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5644,20 +5644,20 @@ define void @v_shuffle_v3i16_v4i16__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5696,20 +5696,20 @@ define void @v_shuffle_v3i16_v4i16__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5748,20 +5748,20 @@ define void @v_shuffle_v3i16_v4i16__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5796,19 +5796,19 @@ define void @v_shuffle_v3i16_v4i16__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5849,22 +5849,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5907,22 +5907,22 @@ define void @v_shuffle_v3i16_v4i16__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -5963,22 +5963,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6021,22 +6021,22 @@ define void @v_shuffle_v3i16_v4i16__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6071,19 +6071,19 @@ define void @v_shuffle_v3i16_v4i16__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6118,19 +6118,19 @@ define void @v_shuffle_v3i16_v4i16__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6165,19 +6165,19 @@ define void @v_shuffle_v3i16_v4i16__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6220,22 +6220,22 @@ define void @v_shuffle_v3i16_v4i16__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6276,22 +6276,22 @@ define void @v_shuffle_v3i16_v4i16__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6334,22 +6334,22 @@ define void @v_shuffle_v3i16_v4i16__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6390,22 +6390,22 @@ define void @v_shuffle_v3i16_v4i16__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v1, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6442,19 +6442,19 @@ define void @v_shuffle_v3i16_v4i16__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6489,19 +6489,19 @@ define void @v_shuffle_v3i16_v4i16__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6538,19 +6538,19 @@ define void @v_shuffle_v3i16_v4i16__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6585,19 +6585,19 @@ define void @v_shuffle_v3i16_v4i16__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6632,19 +6632,19 @@ define void @v_shuffle_v3i16_v4i16__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6685,22 +6685,22 @@ define void @v_shuffle_v3i16_v4i16__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6743,22 +6743,22 @@ define void @v_shuffle_v3i16_v4i16__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6799,22 +6799,22 @@ define void @v_shuffle_v3i16_v4i16__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6857,22 +6857,22 @@ define void @v_shuffle_v3i16_v4i16__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_short v4, v3, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6907,19 +6907,19 @@ define void @v_shuffle_v3i16_v4i16__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -6956,19 +6956,19 @@ define void @v_shuffle_v3i16_v4i16__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7001,17 +7001,17 @@ define void @v_shuffle_v3i16_v4i16__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7056,24 +7056,24 @@ define void @v_shuffle_v3i16_v4i16__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7118,24 +7118,24 @@ define void @v_shuffle_v3i16_v4i16__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7180,24 +7180,24 @@ define void @v_shuffle_v3i16_v4i16__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7242,24 +7242,24 @@ define void @v_shuffle_v3i16_v4i16__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7298,20 +7298,20 @@ define void @v_shuffle_v3i16_v4i16__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7350,20 +7350,20 @@ define void @v_shuffle_v3i16_v4i16__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7396,17 +7396,17 @@ define void @v_shuffle_v3i16_v4i16__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7443,19 +7443,19 @@ define void @v_shuffle_v3i16_v4i16__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: global_store_dword v2, v1, s[0:1] +; GFX942-NEXT: global_store_short v2, v0, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7498,23 +7498,23 @@ define void @v_shuffle_v3i16_v4i16__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7559,24 +7559,24 @@ define void @v_shuffle_v3i16_v4i16__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7619,23 +7619,23 @@ define void @v_shuffle_v3i16_v4i16__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7680,24 +7680,24 @@ define void @v_shuffle_v3i16_v4i16__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX942-NEXT: global_store_dword v4, v0, s[0:1] +; GFX942-NEXT: global_store_short v4, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7734,19 +7734,19 @@ define void @v_shuffle_v3i16_v4i16__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v3, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7785,20 +7785,20 @@ define void @v_shuffle_v3i16_v4i16__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7835,19 +7835,19 @@ define void @v_shuffle_v3i16_v4i16__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 -; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i16_v4i16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_short v2, v1, s[0:1] offset:4 +; GFX942-NEXT: global_store_dword v2, v0, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -7893,17 +7893,17 @@ define void @s_shuffle_v3i16_v4i16__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -7936,17 +7936,17 @@ define void @s_shuffle_v3i16_v4i16__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -7979,17 +7979,17 @@ define void @s_shuffle_v3i16_v4i16__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -8022,17 +8022,17 @@ define void @s_shuffle_v3i16_v4i16__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -8080,17 +8080,17 @@ define void @s_shuffle_v3i16_v4i16__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8124,17 +8124,17 @@ define void @s_shuffle_v3i16_v4i16__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8168,17 +8168,17 @@ define void @s_shuffle_v3i16_v4i16__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8220,21 +8220,21 @@ define void @s_shuffle_v3i16_v4i16__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8274,20 +8274,20 @@ define void @s_shuffle_v3i16_v4i16__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8329,21 +8329,21 @@ define void @s_shuffle_v3i16_v4i16__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8383,20 +8383,20 @@ define void @s_shuffle_v3i16_v4i16__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8432,18 +8432,18 @@ define void @s_shuffle_v3i16_v4i16__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8477,17 +8477,17 @@ define void @s_shuffle_v3i16_v4i16__7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8523,18 +8523,18 @@ define void @s_shuffle_v3i16_v4i16__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8568,17 +8568,17 @@ define void @s_shuffle_v3i16_v4i16__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8620,21 +8620,21 @@ define void @s_shuffle_v3i16_v4i16__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8676,21 +8676,21 @@ define void @s_shuffle_v3i16_v4i16__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8730,20 +8730,20 @@ define void @s_shuffle_v3i16_v4i16__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8785,21 +8785,21 @@ define void @s_shuffle_v3i16_v4i16__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8835,18 +8835,18 @@ define void @s_shuffle_v3i16_v4i16__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8882,18 +8882,18 @@ define void @s_shuffle_v3i16_v4i16__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8949,18 +8949,18 @@ define void @s_shuffle_v3i16_v4i16__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -8996,18 +8996,18 @@ define void @s_shuffle_v3i16_v4i16__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9042,18 +9042,18 @@ define void @s_shuffle_v3i16_v4i16__0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9090,19 +9090,19 @@ define void @s_shuffle_v3i16_v4i16__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9137,18 +9137,18 @@ define void @s_shuffle_v3i16_v4i16__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9185,19 +9185,19 @@ define void @s_shuffle_v3i16_v4i16__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9232,18 +9232,18 @@ define void @s_shuffle_v3i16_v4i16__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9286,22 +9286,22 @@ define void @s_shuffle_v3i16_v4i16__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9343,21 +9343,21 @@ define void @s_shuffle_v3i16_v4i16__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9401,22 +9401,22 @@ define void @s_shuffle_v3i16_v4i16__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9458,21 +9458,21 @@ define void @s_shuffle_v3i16_v4i16__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9514,21 +9514,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9572,22 +9572,22 @@ define void @s_shuffle_v3i16_v4i16__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9629,21 +9629,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9687,22 +9687,22 @@ define void @s_shuffle_v3i16_v4i16__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9744,21 +9744,21 @@ define void @s_shuffle_v3i16_v4i16__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9802,22 +9802,22 @@ define void @s_shuffle_v3i16_v4i16__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -9891,18 +9891,18 @@ define void @s_shuffle_v3i16_v4i16__1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9937,18 +9937,18 @@ define void @s_shuffle_v3i16_v4i16__2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -9983,18 +9983,18 @@ define void @s_shuffle_v3i16_v4i16__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -10054,21 +10054,21 @@ define void @s_shuffle_v3i16_v4i16__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10110,21 +10110,21 @@ define void @s_shuffle_v3i16_v4i16__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10166,21 +10166,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10222,21 +10222,21 @@ define void @s_shuffle_v3i16_v4i16__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10280,22 +10280,22 @@ define void @s_shuffle_v3i16_v4i16__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10339,22 +10339,22 @@ define void @s_shuffle_v3i16_v4i16__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10396,21 +10396,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10454,22 +10454,22 @@ define void @s_shuffle_v3i16_v4i16__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10511,21 +10511,21 @@ define void @s_shuffle_v3i16_v4i16__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10569,22 +10569,22 @@ define void @s_shuffle_v3i16_v4i16__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10658,18 +10658,18 @@ define void @s_shuffle_v3i16_v4i16__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -10723,18 +10723,18 @@ define void @s_shuffle_v3i16_v4i16__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -10794,21 +10794,21 @@ define void @s_shuffle_v3i16_v4i16__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10848,20 +10848,20 @@ define void @s_shuffle_v3i16_v4i16__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10903,21 +10903,21 @@ define void @s_shuffle_v3i16_v4i16__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -10957,20 +10957,20 @@ define void @s_shuffle_v3i16_v4i16__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11012,21 +11012,21 @@ define void @s_shuffle_v3i16_v4i16__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11066,20 +11066,20 @@ define void @s_shuffle_v3i16_v4i16__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11119,20 +11119,20 @@ define void @s_shuffle_v3i16_v4i16__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11174,21 +11174,21 @@ define void @s_shuffle_v3i16_v4i16__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11228,20 +11228,20 @@ define void @s_shuffle_v3i16_v4i16__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11283,21 +11283,21 @@ define void @s_shuffle_v3i16_v4i16__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11333,18 +11333,18 @@ define void @s_shuffle_v3i16_v4i16__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11379,18 +11379,18 @@ define void @s_shuffle_v3i16_v4i16__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11425,18 +11425,18 @@ define void @s_shuffle_v3i16_v4i16__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11471,18 +11471,18 @@ define void @s_shuffle_v3i16_v4i16__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11517,18 +11517,18 @@ define void @s_shuffle_v3i16_v4i16__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11563,18 +11563,18 @@ define void @s_shuffle_v3i16_v4i16__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -11615,21 +11615,21 @@ define void @s_shuffle_v3i16_v4i16__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11671,21 +11671,21 @@ define void @s_shuffle_v3i16_v4i16__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11727,21 +11727,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11783,21 +11783,21 @@ define void @s_shuffle_v3i16_v4i16__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11841,22 +11841,22 @@ define void @s_shuffle_v3i16_v4i16__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11898,21 +11898,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -11956,22 +11956,22 @@ define void @s_shuffle_v3i16_v4i16__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12015,22 +12015,22 @@ define void @s_shuffle_v3i16_v4i16__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12072,21 +12072,21 @@ define void @s_shuffle_v3i16_v4i16__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12130,22 +12130,22 @@ define void @s_shuffle_v3i16_v4i16__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12192,17 +12192,17 @@ define void @s_shuffle_v3i16_v4i16__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -12235,17 +12235,17 @@ define void @s_shuffle_v3i16_v4i16__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -12278,17 +12278,17 @@ define void @s_shuffle_v3i16_v4i16__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -12321,17 +12321,17 @@ define void @s_shuffle_v3i16_v4i16__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> @@ -12383,19 +12383,19 @@ define void @s_shuffle_v3i16_v4i16__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12431,18 +12431,18 @@ define void @s_shuffle_v3i16_v4i16__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12480,19 +12480,19 @@ define void @s_shuffle_v3i16_v4i16__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12528,18 +12528,18 @@ define void @s_shuffle_v3i16_v4i16__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12583,22 +12583,22 @@ define void @s_shuffle_v3i16_v4i16__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12640,21 +12640,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12698,22 +12698,22 @@ define void @s_shuffle_v3i16_v4i16__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12755,21 +12755,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12805,18 +12805,18 @@ define void @s_shuffle_v3i16_v4i16__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12854,19 +12854,19 @@ define void @s_shuffle_v3i16_v4i16__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12928,21 +12928,21 @@ define void @s_shuffle_v3i16_v4i16__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -12984,21 +12984,21 @@ define void @s_shuffle_v3i16_v4i16__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13040,21 +13040,21 @@ define void @s_shuffle_v3i16_v4i16__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13096,21 +13096,21 @@ define void @s_shuffle_v3i16_v4i16__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13166,18 +13166,18 @@ define void @s_shuffle_v3i16_v4i16__5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13213,18 +13213,18 @@ define void @s_shuffle_v3i16_v4i16__6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13260,18 +13260,18 @@ define void @s_shuffle_v3i16_v4i16__7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13307,18 +13307,18 @@ define void @s_shuffle_v3i16_v4i16__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13362,22 +13362,22 @@ define void @s_shuffle_v3i16_v4i16__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13419,21 +13419,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13477,22 +13477,22 @@ define void @s_shuffle_v3i16_v4i16__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13534,21 +13534,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s2, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s2, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13586,19 +13586,19 @@ define void @s_shuffle_v3i16_v4i16__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13636,19 +13636,19 @@ define void @s_shuffle_v3i16_v4i16__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13708,20 +13708,20 @@ define void @s_shuffle_v3i16_v4i16__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13763,21 +13763,21 @@ define void @s_shuffle_v3i16_v4i16__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13817,20 +13817,20 @@ define void @s_shuffle_v3i16_v4i16__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13872,21 +13872,21 @@ define void @s_shuffle_v3i16_v4i16__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -13942,18 +13942,18 @@ define void @s_shuffle_v3i16_v4i16__5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14009,18 +14009,18 @@ define void @s_shuffle_v3i16_v4i16__7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14082,21 +14082,21 @@ define void @s_shuffle_v3i16_v4i16__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14136,20 +14136,20 @@ define void @s_shuffle_v3i16_v4i16__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14191,21 +14191,21 @@ define void @s_shuffle_v3i16_v4i16__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14245,20 +14245,20 @@ define void @s_shuffle_v3i16_v4i16__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14294,18 +14294,18 @@ define void @s_shuffle_v3i16_v4i16__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14361,18 +14361,18 @@ define void @s_shuffle_v3i16_v4i16__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14414,21 +14414,21 @@ define void @s_shuffle_v3i16_v4i16__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14470,21 +14470,21 @@ define void @s_shuffle_v3i16_v4i16__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14526,21 +14526,21 @@ define void @s_shuffle_v3i16_v4i16__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14582,21 +14582,21 @@ define void @s_shuffle_v3i16_v4i16__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14632,18 +14632,18 @@ define void @s_shuffle_v3i16_v4i16__4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14679,18 +14679,18 @@ define void @s_shuffle_v3i16_v4i16__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14726,18 +14726,18 @@ define void @s_shuffle_v3i16_v4i16__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14773,18 +14773,18 @@ define void @s_shuffle_v3i16_v4i16__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14826,21 +14826,21 @@ define void @s_shuffle_v3i16_v4i16__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14882,21 +14882,21 @@ define void @s_shuffle_v3i16_v4i16__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14938,21 +14938,21 @@ define void @s_shuffle_v3i16_v4i16__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -14994,21 +14994,21 @@ define void @s_shuffle_v3i16_v4i16__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_lshr_b32 s9, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_lshr_b32 s9, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -15044,18 +15044,18 @@ define void @s_shuffle_v3i16_v4i16__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -15091,18 +15091,18 @@ define void @s_shuffle_v3i16_v4i16__7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> @@ -15138,18 +15138,18 @@ define void @s_shuffle_v3i16_v4i16__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i16_v4i16__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll index 4bb6ed021f0ca..ea4fac3b1d2b1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i32_v2i32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3i32_v2i32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3i32_v2i32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v3i32_v2i32__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v3i32_v2i32__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v3i32_v2i32__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -283,19 +283,19 @@ define void @v_shuffle_v3i32_v2i32__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -328,17 +328,17 @@ define void @v_shuffle_v3i32_v2i32__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -377,20 +377,20 @@ define void @v_shuffle_v3i32_v2i32__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -430,21 +430,21 @@ define void @v_shuffle_v3i32_v2i32__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -479,19 +479,19 @@ define void @v_shuffle_v3i32_v2i32__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -526,18 +526,18 @@ define void @v_shuffle_v3i32_v2i32__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -571,18 +571,18 @@ define void @v_shuffle_v3i32_v2i32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -616,18 +616,18 @@ define void @v_shuffle_v3i32_v2i32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> zeroinitializer store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -661,19 +661,19 @@ define void @v_shuffle_v3i32_v2i32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -706,18 +706,18 @@ define void @v_shuffle_v3i32_v2i32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -757,21 +757,21 @@ define void @v_shuffle_v3i32_v2i32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -811,20 +811,20 @@ define void @v_shuffle_v3i32_v2i32__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -865,21 +865,21 @@ define void @v_shuffle_v3i32_v2i32__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -920,22 +920,22 @@ define void @v_shuffle_v3i32_v2i32__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -968,18 +968,18 @@ define void @v_shuffle_v3i32_v2i32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1011,18 +1011,18 @@ define void @v_shuffle_v3i32_v2i32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1056,18 +1056,18 @@ define void @v_shuffle_v3i32_v2i32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1099,18 +1099,18 @@ define void @v_shuffle_v3i32_v2i32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1150,21 +1150,21 @@ define void @v_shuffle_v3i32_v2i32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1204,21 +1204,21 @@ define void @v_shuffle_v3i32_v2i32__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1259,22 +1259,22 @@ define void @v_shuffle_v3i32_v2i32__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1316,22 +1316,22 @@ define void @v_shuffle_v3i32_v2i32__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1373,16 +1373,16 @@ define void @v_shuffle_v3i32_v2i32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1414,17 +1414,17 @@ define void @v_shuffle_v3i32_v2i32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1469,19 +1469,19 @@ define void @v_shuffle_v3i32_v2i32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1515,18 +1515,18 @@ define void @v_shuffle_v3i32_v2i32__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1567,22 +1567,22 @@ define void @v_shuffle_v3i32_v2i32__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1621,21 +1621,21 @@ define void @v_shuffle_v3i32_v2i32__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1668,18 +1668,18 @@ define void @v_shuffle_v3i32_v2i32__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1719,21 +1719,21 @@ define void @v_shuffle_v3i32_v2i32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1774,21 +1774,21 @@ define void @v_shuffle_v3i32_v2i32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1821,18 +1821,18 @@ define void @v_shuffle_v3i32_v2i32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1866,18 +1866,18 @@ define void @v_shuffle_v3i32_v2i32__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1919,22 +1919,22 @@ define void @v_shuffle_v3i32_v2i32__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -1975,21 +1975,21 @@ define void @v_shuffle_v3i32_v2i32__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2024,19 +2024,19 @@ define void @v_shuffle_v3i32_v2i32__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v2i32__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v2i32__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2081,17 +2081,17 @@ define void @s_shuffle_v3i32_v2i32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -2123,17 +2123,17 @@ define void @s_shuffle_v3i32_v2i32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -2179,17 +2179,17 @@ define void @s_shuffle_v3i32_v2i32__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2230,21 +2230,21 @@ define void @s_shuffle_v3i32_v2i32__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2283,20 +2283,20 @@ define void @s_shuffle_v3i32_v2i32__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2331,18 +2331,18 @@ define void @s_shuffle_v3i32_v2i32__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2402,21 +2402,21 @@ define void @s_shuffle_v3i32_v2i32__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2457,21 +2457,21 @@ define void @s_shuffle_v3i32_v2i32__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2508,19 +2508,19 @@ define void @s_shuffle_v3i32_v2i32__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2575,18 +2575,18 @@ define void @s_shuffle_v3i32_v2i32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -2641,19 +2641,19 @@ define void @s_shuffle_v3i32_v2i32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -2687,18 +2687,18 @@ define void @s_shuffle_v3i32_v2i32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -2740,22 +2740,22 @@ define void @s_shuffle_v3i32_v2i32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2796,21 +2796,21 @@ define void @s_shuffle_v3i32_v2i32__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2853,22 +2853,22 @@ define void @s_shuffle_v3i32_v2i32__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -2911,22 +2911,22 @@ define void @s_shuffle_v3i32_v2i32__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3040,21 +3040,21 @@ define void @s_shuffle_v3i32_v2i32__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3095,21 +3095,21 @@ define void @s_shuffle_v3i32_v2i32__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3152,22 +3152,22 @@ define void @s_shuffle_v3i32_v2i32__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3210,22 +3210,22 @@ define void @s_shuffle_v3i32_v2i32__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3270,17 +3270,17 @@ define void @s_shuffle_v3i32_v2i32__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -3312,17 +3312,17 @@ define void @s_shuffle_v3i32_v2i32__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -3372,19 +3372,19 @@ define void @s_shuffle_v3i32_v2i32__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3419,18 +3419,18 @@ define void @s_shuffle_v3i32_v2i32__3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3473,22 +3473,22 @@ define void @s_shuffle_v3i32_v2i32__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3529,21 +3529,21 @@ define void @s_shuffle_v3i32_v2i32__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3603,21 +3603,21 @@ define void @s_shuffle_v3i32_v2i32__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3658,21 +3658,21 @@ define void @s_shuffle_v3i32_v2i32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3726,18 +3726,18 @@ define void @s_shuffle_v3i32_v2i32__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3780,22 +3780,22 @@ define void @s_shuffle_v3i32_v2i32__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3836,21 +3836,21 @@ define void @s_shuffle_v3i32_v2i32__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> @@ -3887,19 +3887,19 @@ define void @s_shuffle_v3i32_v2i32__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v2i32__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v2i32__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll index a4d63a61a2687..7061c13b28d03 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i32_v3i32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3i32_v3i32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3i32_v3i32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v3i32_v3i32__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v3i32_v3i32__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v3i32_v3i32__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -268,22 +268,22 @@ define void @v_shuffle_v3i32_v3i32__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -322,21 +322,21 @@ define void @v_shuffle_v3i32_v3i32__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -377,21 +377,21 @@ define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -425,18 +425,18 @@ define void @v_shuffle_v3i32_v3i32__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -469,17 +469,17 @@ define void @v_shuffle_v3i32_v3i32__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -514,18 +514,18 @@ define void @v_shuffle_v3i32_v3i32__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -568,23 +568,23 @@ define void @v_shuffle_v3i32_v3i32__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -627,23 +627,23 @@ define void @v_shuffle_v3i32_v3i32__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -684,22 +684,22 @@ define void @v_shuffle_v3i32_v3i32__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -735,20 +735,20 @@ define void @v_shuffle_v3i32_v3i32__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -785,20 +785,20 @@ define void @v_shuffle_v3i32_v3i32__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -833,18 +833,18 @@ define void @v_shuffle_v3i32_v3i32__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -879,18 +879,18 @@ define void @v_shuffle_v3i32_v3i32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -924,18 +924,18 @@ define void @v_shuffle_v3i32_v3i32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> zeroinitializer store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -970,19 +970,19 @@ define void @v_shuffle_v3i32_v3i32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1017,19 +1017,19 @@ define void @v_shuffle_v3i32_v3i32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1063,18 +1063,18 @@ define void @v_shuffle_v3i32_v3i32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1116,23 +1116,23 @@ define void @v_shuffle_v3i32_v3i32__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1175,23 +1175,23 @@ define void @v_shuffle_v3i32_v3i32__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1232,22 +1232,22 @@ define void @v_shuffle_v3i32_v3i32__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1290,23 +1290,23 @@ define void @v_shuffle_v3i32_v3i32__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1348,22 +1348,22 @@ define void @v_shuffle_v3i32_v3i32__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1405,21 +1405,21 @@ define void @v_shuffle_v3i32_v3i32__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1460,22 +1460,22 @@ define void @v_shuffle_v3i32_v3i32__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1508,17 +1508,17 @@ define void @v_shuffle_v3i32_v3i32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1550,17 +1550,17 @@ define void @v_shuffle_v3i32_v3i32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1594,18 +1594,18 @@ define void @v_shuffle_v3i32_v3i32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1639,18 +1639,18 @@ define void @v_shuffle_v3i32_v3i32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v3i32_v3i32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1732,22 +1732,22 @@ define void @v_shuffle_v3i32_v3i32__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1788,22 +1788,22 @@ define void @v_shuffle_v3i32_v3i32__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1844,22 +1844,22 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1902,23 +1902,23 @@ define void @v_shuffle_v3i32_v3i32__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -1961,22 +1961,22 @@ define void @v_shuffle_v3i32_v3i32__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2018,21 +2018,21 @@ define void @v_shuffle_v3i32_v3i32__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2073,22 +2073,22 @@ define void @v_shuffle_v3i32_v3i32__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2121,17 +2121,17 @@ define void @v_shuffle_v3i32_v3i32__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2163,17 +2163,17 @@ define void @v_shuffle_v3i32_v3i32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2207,18 +2207,18 @@ define void @v_shuffle_v3i32_v3i32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2252,18 +2252,18 @@ define void @v_shuffle_v3i32_v3i32__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2295,17 +2295,17 @@ define void @v_shuffle_v3i32_v3i32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2345,21 +2345,21 @@ define void @v_shuffle_v3i32_v3i32__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2400,21 +2400,21 @@ define void @v_shuffle_v3i32_v3i32__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2453,21 +2453,21 @@ define void @v_shuffle_v3i32_v3i32__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2510,22 +2510,22 @@ define void @v_shuffle_v3i32_v3i32__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2564,21 +2564,21 @@ define void @v_shuffle_v3i32_v3i32__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v3i32_v3i32__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2675,21 +2675,21 @@ define void @v_shuffle_v3i32_v3i32__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2731,16 +2731,16 @@ define void @v_shuffle_v3i32_v3i32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2772,17 +2772,17 @@ define void @v_shuffle_v3i32_v3i32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2814,17 +2814,17 @@ define void @v_shuffle_v3i32_v3i32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2870,19 +2870,19 @@ define void @v_shuffle_v3i32_v3i32__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2918,19 +2918,19 @@ define void @v_shuffle_v3i32_v3i32__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v3i32_v3i32__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3017,21 +3017,21 @@ define void @v_shuffle_v3i32_v3i32__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3070,21 +3070,21 @@ define void @v_shuffle_v3i32_v3i32__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3127,22 +3127,22 @@ define void @v_shuffle_v3i32_v3i32__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3177,19 +3177,19 @@ define void @v_shuffle_v3i32_v3i32__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3222,17 +3222,17 @@ define void @v_shuffle_v3i32_v3i32__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3272,22 +3272,22 @@ define void @v_shuffle_v3i32_v3i32__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3i32_v3i32__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3384,21 +3384,21 @@ define void @v_shuffle_v3i32_v3i32__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3431,17 +3431,17 @@ define void @v_shuffle_v3i32_v3i32__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3476,18 +3476,18 @@ define void @v_shuffle_v3i32_v3i32__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3522,18 +3522,18 @@ define void @v_shuffle_v3i32_v3i32__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3568,18 +3568,18 @@ define void @v_shuffle_v3i32_v3i32__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3620,21 +3620,21 @@ define void @v_shuffle_v3i32_v3i32__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3675,22 +3675,22 @@ define void @v_shuffle_v3i32_v3i32__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3733,22 +3733,22 @@ define void @v_shuffle_v3i32_v3i32__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3783,19 +3783,19 @@ define void @v_shuffle_v3i32_v3i32__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3828,17 +3828,17 @@ define void @v_shuffle_v3i32_v3i32__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3879,22 +3879,22 @@ define void @v_shuffle_v3i32_v3i32__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3935,22 +3935,22 @@ define void @v_shuffle_v3i32_v3i32__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -3991,21 +3991,21 @@ define void @v_shuffle_v3i32_v3i32__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4038,17 +4038,17 @@ define void @v_shuffle_v3i32_v3i32__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4083,18 +4083,18 @@ define void @v_shuffle_v3i32_v3i32__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4127,17 +4127,17 @@ define void @v_shuffle_v3i32_v3i32__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4178,22 +4178,22 @@ define void @v_shuffle_v3i32_v3i32__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4234,22 +4234,22 @@ define void @v_shuffle_v3i32_v3i32__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4290,21 +4290,21 @@ define void @v_shuffle_v3i32_v3i32__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4341,20 +4341,20 @@ define void @v_shuffle_v3i32_v3i32__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4387,17 +4387,17 @@ define void @v_shuffle_v3i32_v3i32__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v3i32__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4442,17 +4442,17 @@ define void @s_shuffle_v3i32_v3i32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -4484,17 +4484,17 @@ define void @s_shuffle_v3i32_v3i32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -4526,17 +4526,17 @@ define void @s_shuffle_v3i32_v3i32__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -4582,17 +4582,17 @@ define void @s_shuffle_v3i32_v3i32__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4625,17 +4625,17 @@ define void @s_shuffle_v3i32_v3i32__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4676,21 +4676,21 @@ define void @s_shuffle_v3i32_v3i32__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4729,20 +4729,20 @@ define void @s_shuffle_v3i32_v3i32__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4783,21 +4783,21 @@ define void @s_shuffle_v3i32_v3i32__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4832,18 +4832,18 @@ define void @s_shuffle_v3i32_v3i32__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4897,18 +4897,18 @@ define void @s_shuffle_v3i32_v3i32__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -4951,22 +4951,22 @@ define void @s_shuffle_v3i32_v3i32__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5009,22 +5009,22 @@ define void @s_shuffle_v3i32_v3i32__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5065,21 +5065,21 @@ define void @s_shuffle_v3i32_v3i32__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5116,19 +5116,19 @@ define void @s_shuffle_v3i32_v3i32__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5165,19 +5165,19 @@ define void @s_shuffle_v3i32_v3i32__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5232,18 +5232,18 @@ define void @s_shuffle_v3i32_v3i32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -5298,19 +5298,19 @@ define void @s_shuffle_v3i32_v3i32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -5346,19 +5346,19 @@ define void @s_shuffle_v3i32_v3i32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -5392,18 +5392,18 @@ define void @s_shuffle_v3i32_v3i32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -5445,22 +5445,22 @@ define void @s_shuffle_v3i32_v3i32__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5503,22 +5503,22 @@ define void @s_shuffle_v3i32_v3i32__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5559,21 +5559,21 @@ define void @s_shuffle_v3i32_v3i32__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5616,22 +5616,22 @@ define void @s_shuffle_v3i32_v3i32__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5674,22 +5674,22 @@ define void @s_shuffle_v3i32_v3i32__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5732,22 +5732,22 @@ define void @s_shuffle_v3i32_v3i32__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5788,21 +5788,21 @@ define void @s_shuffle_v3i32_v3i32__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5935,21 +5935,21 @@ define void @s_shuffle_v3i32_v3i32__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -5990,21 +5990,21 @@ define void @s_shuffle_v3i32_v3i32__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6045,21 +6045,21 @@ define void @s_shuffle_v3i32_v3i32__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6102,22 +6102,22 @@ define void @s_shuffle_v3i32_v3i32__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6160,22 +6160,22 @@ define void @s_shuffle_v3i32_v3i32__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6218,22 +6218,22 @@ define void @s_shuffle_v3i32_v3i32__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6274,21 +6274,21 @@ define void @s_shuffle_v3i32_v3i32__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6421,21 +6421,21 @@ define void @s_shuffle_v3i32_v3i32__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6476,21 +6476,21 @@ define void @s_shuffle_v3i32_v3i32__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6529,20 +6529,20 @@ define void @s_shuffle_v3i32_v3i32__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6585,22 +6585,22 @@ define void @s_shuffle_v3i32_v3i32__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6639,20 +6639,20 @@ define void @s_shuffle_v3i32_v3i32__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6693,21 +6693,21 @@ define void @s_shuffle_v3i32_v3i32__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6748,21 +6748,21 @@ define void @s_shuffle_v3i32_v3i32__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -6807,17 +6807,17 @@ define void @s_shuffle_v3i32_v3i32__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -6849,17 +6849,17 @@ define void @s_shuffle_v3i32_v3i32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -6891,17 +6891,17 @@ define void @s_shuffle_v3i32_v3i32__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -6951,19 +6951,19 @@ define void @s_shuffle_v3i32_v3i32__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7000,19 +7000,19 @@ define void @s_shuffle_v3i32_v3i32__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7047,18 +7047,18 @@ define void @s_shuffle_v3i32_v3i32__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7101,22 +7101,22 @@ define void @s_shuffle_v3i32_v3i32__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7157,21 +7157,21 @@ define void @s_shuffle_v3i32_v3i32__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7214,22 +7214,22 @@ define void @s_shuffle_v3i32_v3i32__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7266,19 +7266,19 @@ define void @s_shuffle_v3i32_v3i32__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7338,21 +7338,21 @@ define void @s_shuffle_v3i32_v3i32__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7393,21 +7393,21 @@ define void @s_shuffle_v3i32_v3i32__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7448,21 +7448,21 @@ define void @s_shuffle_v3i32_v3i32__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7556,18 +7556,18 @@ define void @s_shuffle_v3i32_v3i32__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7610,22 +7610,22 @@ define void @s_shuffle_v3i32_v3i32__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7666,21 +7666,21 @@ define void @s_shuffle_v3i32_v3i32__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7723,22 +7723,22 @@ define void @s_shuffle_v3i32_v3i32__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7775,19 +7775,19 @@ define void @s_shuffle_v3i32_v3i32__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7847,21 +7847,21 @@ define void @s_shuffle_v3i32_v3i32__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7902,21 +7902,21 @@ define void @s_shuffle_v3i32_v3i32__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -7957,21 +7957,21 @@ define void @s_shuffle_v3i32_v3i32__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -8070,21 +8070,21 @@ define void @s_shuffle_v3i32_v3i32__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -8125,21 +8125,21 @@ define void @s_shuffle_v3i32_v3i32__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -8180,21 +8180,21 @@ define void @s_shuffle_v3i32_v3i32__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> @@ -8231,19 +8231,19 @@ define void @s_shuffle_v3i32_v3i32__5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v3i32__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v3i32__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll index 445493f48204a..11d1897d0449f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i32_v4i32__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3i32_v4i32__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -78,17 +78,17 @@ define void @v_shuffle_v3i32_v4i32__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -120,17 +120,17 @@ define void @v_shuffle_v3i32_v4i32__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -162,17 +162,17 @@ define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -214,17 +214,17 @@ define void @v_shuffle_v3i32_v4i32__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -257,17 +257,17 @@ define void @v_shuffle_v3i32_v4i32__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -300,17 +300,17 @@ define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -351,22 +351,22 @@ define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v3i32_v4i32__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v3i32_v4i32__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -563,18 +563,18 @@ define void @v_shuffle_v3i32_v4i32__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -607,17 +607,17 @@ define void @v_shuffle_v3i32_v4i32__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -651,18 +651,18 @@ define void @v_shuffle_v3i32_v4i32__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -751,23 +751,23 @@ define void @v_shuffle_v3i32_v4i32__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -810,23 +810,23 @@ define void @v_shuffle_v3i32_v4i32__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -867,22 +867,22 @@ define void @v_shuffle_v3i32_v4i32__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -924,22 +924,22 @@ define void @v_shuffle_v3i32_v4i32__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -975,18 +975,18 @@ define void @v_shuffle_v3i32_v4i32__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1021,18 +1021,18 @@ define void @v_shuffle_v3i32_v4i32__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1067,18 +1067,18 @@ define void @v_shuffle_v3i32_v4i32__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1114,19 +1114,19 @@ define void @v_shuffle_v3i32_v4i32__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1161,18 +1161,18 @@ define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1206,18 +1206,18 @@ define void @v_shuffle_v3i32_v4i32__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> zeroinitializer store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1252,19 +1252,19 @@ define void @v_shuffle_v3i32_v4i32__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1299,18 +1299,18 @@ define void @v_shuffle_v3i32_v4i32__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1346,19 +1346,19 @@ define void @v_shuffle_v3i32_v4i32__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1392,18 +1392,18 @@ define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1444,23 +1444,23 @@ define void @v_shuffle_v3i32_v4i32__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1503,23 +1503,23 @@ define void @v_shuffle_v3i32_v4i32__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1562,23 +1562,23 @@ define void @v_shuffle_v3i32_v4i32__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1619,22 +1619,22 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1677,23 +1677,23 @@ define void @v_shuffle_v3i32_v4i32__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1735,22 +1735,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1792,21 +1792,21 @@ define void @v_shuffle_v3i32_v4i32__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1849,22 +1849,22 @@ define void @v_shuffle_v3i32_v4i32__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1905,22 +1905,22 @@ define void @v_shuffle_v3i32_v4i32__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -1962,23 +1962,23 @@ define void @v_shuffle_v3i32_v4i32__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2011,17 +2011,17 @@ define void @v_shuffle_v3i32_v4i32__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2053,17 +2053,17 @@ define void @v_shuffle_v3i32_v4i32__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2097,18 +2097,18 @@ define void @v_shuffle_v3i32_v4i32__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2142,18 +2142,18 @@ define void @v_shuffle_v3i32_v4i32__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2187,18 +2187,18 @@ define void @v_shuffle_v3i32_v4i32__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2230,17 +2230,17 @@ define void @v_shuffle_v3i32_v4i32__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2280,22 +2280,22 @@ define void @v_shuffle_v3i32_v4i32__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2336,22 +2336,22 @@ define void @v_shuffle_v3i32_v4i32__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2392,22 +2392,22 @@ define void @v_shuffle_v3i32_v4i32__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2448,22 +2448,22 @@ define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2506,23 +2506,23 @@ define void @v_shuffle_v3i32_v4i32__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2565,22 +2565,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2621,21 +2621,21 @@ define void @v_shuffle_v3i32_v4i32__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2678,22 +2678,22 @@ define void @v_shuffle_v3i32_v4i32__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2734,22 +2734,22 @@ define void @v_shuffle_v3i32_v4i32__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2791,23 +2791,23 @@ define void @v_shuffle_v3i32_v4i32__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -2840,17 +2840,17 @@ define void @v_shuffle_v3i32_v4i32__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2882,17 +2882,17 @@ define void @v_shuffle_v3i32_v4i32__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2925,18 +2925,18 @@ define void @v_shuffle_v3i32_v4i32__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2970,18 +2970,18 @@ define void @v_shuffle_v3i32_v4i32__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3015,18 +3015,18 @@ define void @v_shuffle_v3i32_v4i32__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3058,17 +3058,17 @@ define void @v_shuffle_v3i32_v4i32__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3108,21 +3108,21 @@ define void @v_shuffle_v3i32_v4i32__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3163,21 +3163,21 @@ define void @v_shuffle_v3i32_v4i32__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v3i32_v4i32__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v3i32_v4i32__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3i32_v4i32__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3382,21 +3382,21 @@ define void @v_shuffle_v3i32_v4i32__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3437,21 +3437,21 @@ define void @v_shuffle_v3i32_v4i32__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v3i32_v4i32__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3548,21 +3548,21 @@ define void @v_shuffle_v3i32_v4i32__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3603,22 +3603,22 @@ define void @v_shuffle_v3i32_v4i32__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3652,18 +3652,18 @@ define void @v_shuffle_v3i32_v4i32__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3697,18 +3697,18 @@ define void @v_shuffle_v3i32_v4i32__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3742,19 +3742,19 @@ define void @v_shuffle_v3i32_v4i32__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3789,19 +3789,19 @@ define void @v_shuffle_v3i32_v4i32__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3836,19 +3836,19 @@ define void @v_shuffle_v3i32_v4i32__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3881,18 +3881,18 @@ define void @v_shuffle_v3i32_v4i32__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3933,22 +3933,22 @@ define void @v_shuffle_v3i32_v4i32__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -3990,22 +3990,22 @@ define void @v_shuffle_v3i32_v4i32__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4047,22 +4047,22 @@ define void @v_shuffle_v3i32_v4i32__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4102,21 +4102,21 @@ define void @v_shuffle_v3i32_v4i32__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4158,22 +4158,22 @@ define void @v_shuffle_v3i32_v4i32__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4214,21 +4214,21 @@ define void @v_shuffle_v3i32_v4i32__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4269,22 +4269,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4326,22 +4326,22 @@ define void @v_shuffle_v3i32_v4i32__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4382,21 +4382,21 @@ define void @v_shuffle_v3i32_v4i32__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4438,22 +4438,22 @@ define void @v_shuffle_v3i32_v4i32__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4495,16 +4495,16 @@ define void @v_shuffle_v3i32_v4i32__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4535,17 +4535,17 @@ define void @v_shuffle_v3i32_v4i32__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4577,17 +4577,17 @@ define void @v_shuffle_v3i32_v4i32__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4619,17 +4619,17 @@ define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4675,19 +4675,19 @@ define void @v_shuffle_v3i32_v4i32__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4723,18 +4723,18 @@ define void @v_shuffle_v3i32_v4i32__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4771,19 +4771,19 @@ define void @v_shuffle_v3i32_v4i32__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4818,18 +4818,18 @@ define void @v_shuffle_v3i32_v4i32__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4872,23 +4872,23 @@ define void @v_shuffle_v3i32_v4i32__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4927,21 +4927,21 @@ define void @v_shuffle_v3i32_v4i32__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -4982,22 +4982,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5040,22 +5040,22 @@ define void @v_shuffle_v3i32_v4i32__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5092,19 +5092,19 @@ define void @v_shuffle_v3i32_v4i32__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5140,20 +5140,20 @@ define void @v_shuffle_v3i32_v4i32__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5186,17 +5186,17 @@ define void @v_shuffle_v3i32_v4i32__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5236,22 +5236,22 @@ define void @v_shuffle_v3i32_v4i32__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5291,22 +5291,22 @@ define void @v_shuffle_v3i32_v4i32__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5347,21 +5347,21 @@ define void @v_shuffle_v3i32_v4i32__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5402,21 +5402,21 @@ define void @v_shuffle_v3i32_v4i32__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5449,17 +5449,17 @@ define void @v_shuffle_v3i32_v4i32__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5494,18 +5494,18 @@ define void @v_shuffle_v3i32_v4i32__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5540,18 +5540,18 @@ define void @v_shuffle_v3i32_v4i32__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5586,18 +5586,18 @@ define void @v_shuffle_v3i32_v4i32__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5632,18 +5632,18 @@ define void @v_shuffle_v3i32_v4i32__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5686,23 +5686,23 @@ define void @v_shuffle_v3i32_v4i32__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5743,22 +5743,22 @@ define void @v_shuffle_v3i32_v4i32__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5800,22 +5800,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5858,22 +5858,22 @@ define void @v_shuffle_v3i32_v4i32__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5910,19 +5910,19 @@ define void @v_shuffle_v3i32_v4i32__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -5958,20 +5958,20 @@ define void @v_shuffle_v3i32_v4i32__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6004,17 +6004,17 @@ define void @v_shuffle_v3i32_v4i32__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6055,22 +6055,22 @@ define void @v_shuffle_v3i32_v4i32__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6111,22 +6111,22 @@ define void @v_shuffle_v3i32_v4i32__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6167,21 +6167,21 @@ define void @v_shuffle_v3i32_v4i32__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6222,21 +6222,21 @@ define void @v_shuffle_v3i32_v4i32__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6269,17 +6269,17 @@ define void @v_shuffle_v3i32_v4i32__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6313,18 +6313,18 @@ define void @v_shuffle_v3i32_v4i32__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6359,18 +6359,18 @@ define void @v_shuffle_v3i32_v4i32__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6405,18 +6405,18 @@ define void @v_shuffle_v3i32_v4i32__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6449,17 +6449,17 @@ define void @v_shuffle_v3i32_v4i32__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6500,22 +6500,22 @@ define void @v_shuffle_v3i32_v4i32__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v3i32_v4i32__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6612,21 +6612,21 @@ define void @v_shuffle_v3i32_v4i32__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6667,21 +6667,21 @@ define void @v_shuffle_v3i32_v4i32__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6717,20 +6717,20 @@ define void @v_shuffle_v3i32_v4i32__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6763,17 +6763,17 @@ define void @v_shuffle_v3i32_v4i32__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6807,18 +6807,18 @@ define void @v_shuffle_v3i32_v4i32__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6859,22 +6859,22 @@ define void @v_shuffle_v3i32_v4i32__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6916,22 +6916,22 @@ define void @v_shuffle_v3i32_v4i32__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -6973,22 +6973,22 @@ define void @v_shuffle_v3i32_v4i32__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7030,22 +7030,22 @@ define void @v_shuffle_v3i32_v4i32__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7080,18 +7080,18 @@ define void @v_shuffle_v3i32_v4i32__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7126,19 +7126,19 @@ define void @v_shuffle_v3i32_v4i32__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7174,19 +7174,19 @@ define void @v_shuffle_v3i32_v4i32__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7220,18 +7220,18 @@ define void @v_shuffle_v3i32_v4i32__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7273,23 +7273,23 @@ define void @v_shuffle_v3i32_v4i32__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7330,22 +7330,22 @@ define void @v_shuffle_v3i32_v4i32__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7387,22 +7387,22 @@ define void @v_shuffle_v3i32_v4i32__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7444,22 +7444,22 @@ define void @v_shuffle_v3i32_v4i32__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7495,20 +7495,20 @@ define void @v_shuffle_v3i32_v4i32__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7543,18 +7543,18 @@ define void @v_shuffle_v3i32_v4i32__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7589,19 +7589,19 @@ define void @v_shuffle_v3i32_v4i32__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7646,17 +7646,17 @@ define void @s_shuffle_v3i32_v4i32__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -7688,17 +7688,17 @@ define void @s_shuffle_v3i32_v4i32__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -7730,17 +7730,17 @@ define void @s_shuffle_v3i32_v4i32__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -7772,17 +7772,17 @@ define void @s_shuffle_v3i32_v4i32__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -7828,17 +7828,17 @@ define void @s_shuffle_v3i32_v4i32__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7871,17 +7871,17 @@ define void @s_shuffle_v3i32_v4i32__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7914,17 +7914,17 @@ define void @s_shuffle_v3i32_v4i32__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -7965,21 +7965,21 @@ define void @s_shuffle_v3i32_v4i32__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8018,20 +8018,20 @@ define void @s_shuffle_v3i32_v4i32__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8072,21 +8072,21 @@ define void @s_shuffle_v3i32_v4i32__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8127,21 +8127,21 @@ define void @s_shuffle_v3i32_v4i32__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8176,18 +8176,18 @@ define void @s_shuffle_v3i32_v4i32__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8241,18 +8241,18 @@ define void @s_shuffle_v3i32_v4i32__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8287,18 +8287,18 @@ define void @s_shuffle_v3i32_v4i32__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8341,22 +8341,22 @@ define void @s_shuffle_v3i32_v4i32__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8399,22 +8399,22 @@ define void @s_shuffle_v3i32_v4i32__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8455,21 +8455,21 @@ define void @s_shuffle_v3i32_v4i32__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8512,22 +8512,22 @@ define void @s_shuffle_v3i32_v4i32__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8564,19 +8564,19 @@ define void @s_shuffle_v3i32_v4i32__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8613,19 +8613,19 @@ define void @s_shuffle_v3i32_v4i32__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8682,19 +8682,19 @@ define void @s_shuffle_v3i32_v4i32__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -8729,18 +8729,18 @@ define void @s_shuffle_v3i32_v4i32__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -8795,19 +8795,19 @@ define void @s_shuffle_v3i32_v4i32__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -8843,19 +8843,19 @@ define void @s_shuffle_v3i32_v4i32__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -8891,19 +8891,19 @@ define void @s_shuffle_v3i32_v4i32__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -8937,18 +8937,18 @@ define void @s_shuffle_v3i32_v4i32__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -8990,22 +8990,22 @@ define void @s_shuffle_v3i32_v4i32__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9048,22 +9048,22 @@ define void @s_shuffle_v3i32_v4i32__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9106,22 +9106,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9162,21 +9162,21 @@ define void @s_shuffle_v3i32_v4i32__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9219,22 +9219,22 @@ define void @s_shuffle_v3i32_v4i32__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9277,22 +9277,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9335,22 +9335,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9393,22 +9393,22 @@ define void @s_shuffle_v3i32_v4i32__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9449,21 +9449,21 @@ define void @s_shuffle_v3i32_v4i32__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9506,22 +9506,22 @@ define void @s_shuffle_v3i32_v4i32__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9673,21 +9673,21 @@ define void @s_shuffle_v3i32_v4i32__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9728,21 +9728,21 @@ define void @s_shuffle_v3i32_v4i32__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9783,21 +9783,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9838,21 +9838,21 @@ define void @s_shuffle_v3i32_v4i32__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9895,22 +9895,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -9953,22 +9953,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10011,22 +10011,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10069,22 +10069,22 @@ define void @s_shuffle_v3i32_v4i32__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10125,21 +10125,21 @@ define void @s_shuffle_v3i32_v4i32__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10182,22 +10182,22 @@ define void @s_shuffle_v3i32_v4i32__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10349,21 +10349,21 @@ define void @s_shuffle_v3i32_v4i32__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10404,21 +10404,21 @@ define void @s_shuffle_v3i32_v4i32__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10459,21 +10459,21 @@ define void @s_shuffle_v3i32_v4i32__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10512,20 +10512,20 @@ define void @s_shuffle_v3i32_v4i32__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10568,22 +10568,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10622,20 +10622,20 @@ define void @s_shuffle_v3i32_v4i32__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10676,21 +10676,21 @@ define void @s_shuffle_v3i32_v4i32__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10731,21 +10731,21 @@ define void @s_shuffle_v3i32_v4i32__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10786,21 +10786,21 @@ define void @s_shuffle_v3i32_v4i32__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10841,21 +10841,21 @@ define void @s_shuffle_v3i32_v4i32__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -10890,18 +10890,18 @@ define void @s_shuffle_v3i32_v4i32__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -10956,19 +10956,19 @@ define void @s_shuffle_v3i32_v4i32__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11004,19 +11004,19 @@ define void @s_shuffle_v3i32_v4i32__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11052,19 +11052,19 @@ define void @s_shuffle_v3i32_v4i32__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11098,18 +11098,18 @@ define void @s_shuffle_v3i32_v4i32__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11151,22 +11151,22 @@ define void @s_shuffle_v3i32_v4i32__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11209,22 +11209,22 @@ define void @s_shuffle_v3i32_v4i32__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11267,22 +11267,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11323,21 +11323,21 @@ define void @s_shuffle_v3i32_v4i32__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11380,22 +11380,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11436,21 +11436,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11493,22 +11493,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11551,22 +11551,22 @@ define void @s_shuffle_v3i32_v4i32__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11607,21 +11607,21 @@ define void @s_shuffle_v3i32_v4i32__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11664,22 +11664,22 @@ define void @s_shuffle_v3i32_v4i32__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11724,17 +11724,17 @@ define void @s_shuffle_v3i32_v4i32__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11766,17 +11766,17 @@ define void @s_shuffle_v3i32_v4i32__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11808,17 +11808,17 @@ define void @s_shuffle_v3i32_v4i32__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11850,17 +11850,17 @@ define void @s_shuffle_v3i32_v4i32__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) @@ -11910,19 +11910,19 @@ define void @s_shuffle_v3i32_v4i32__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -11959,19 +11959,19 @@ define void @s_shuffle_v3i32_v4i32__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12008,19 +12008,19 @@ define void @s_shuffle_v3i32_v4i32__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12055,18 +12055,18 @@ define void @s_shuffle_v3i32_v4i32__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12109,22 +12109,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12165,21 +12165,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12222,22 +12222,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12280,22 +12280,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12332,19 +12332,19 @@ define void @s_shuffle_v3i32_v4i32__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12381,19 +12381,19 @@ define void @s_shuffle_v3i32_v4i32__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12453,21 +12453,21 @@ define void @s_shuffle_v3i32_v4i32__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12508,21 +12508,21 @@ define void @s_shuffle_v3i32_v4i32__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12563,21 +12563,21 @@ define void @s_shuffle_v3i32_v4i32__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12618,21 +12618,21 @@ define void @s_shuffle_v3i32_v4i32__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12746,18 +12746,18 @@ define void @s_shuffle_v3i32_v4i32__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12800,22 +12800,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12856,21 +12856,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12913,22 +12913,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -12971,22 +12971,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13023,19 +13023,19 @@ define void @s_shuffle_v3i32_v4i32__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13072,19 +13072,19 @@ define void @s_shuffle_v3i32_v4i32__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13144,21 +13144,21 @@ define void @s_shuffle_v3i32_v4i32__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13199,21 +13199,21 @@ define void @s_shuffle_v3i32_v4i32__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13254,21 +13254,21 @@ define void @s_shuffle_v3i32_v4i32__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13309,21 +13309,21 @@ define void @s_shuffle_v3i32_v4i32__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13462,21 +13462,21 @@ define void @s_shuffle_v3i32_v4i32__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13517,21 +13517,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13572,21 +13572,21 @@ define void @s_shuffle_v3i32_v4i32__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13627,21 +13627,21 @@ define void @s_shuffle_v3i32_v4i32__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13678,19 +13678,19 @@ define void @s_shuffle_v3i32_v4i32__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13744,18 +13744,18 @@ define void @s_shuffle_v3i32_v4i32__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13796,21 +13796,21 @@ define void @s_shuffle_v3i32_v4i32__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13853,22 +13853,22 @@ define void @s_shuffle_v3i32_v4i32__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13911,22 +13911,22 @@ define void @s_shuffle_v3i32_v4i32__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -13969,22 +13969,22 @@ define void @s_shuffle_v3i32_v4i32__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14041,19 +14041,19 @@ define void @s_shuffle_v3i32_v4i32__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14090,19 +14090,19 @@ define void @s_shuffle_v3i32_v4i32__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14137,18 +14137,18 @@ define void @s_shuffle_v3i32_v4i32__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14191,22 +14191,22 @@ define void @s_shuffle_v3i32_v4i32__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14247,21 +14247,21 @@ define void @s_shuffle_v3i32_v4i32__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14304,22 +14304,22 @@ define void @s_shuffle_v3i32_v4i32__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14362,22 +14362,22 @@ define void @s_shuffle_v3i32_v4i32__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14414,19 +14414,19 @@ define void @s_shuffle_v3i32_v4i32__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> @@ -14483,19 +14483,19 @@ define void @s_shuffle_v3i32_v4i32__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i32_v4i32__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll index f9c807f358da9..a15fc3212f474 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i64_v2i64__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3i64_v2i64__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -81,18 +81,18 @@ define void @v_shuffle_v3i64_v2i64__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -137,18 +137,18 @@ define void @v_shuffle_v3i64_v2i64__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v3i64_v2i64__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v3i64_v2i64__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v3i64_v2i64__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v3i64_v2i64__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -402,22 +402,22 @@ define void @v_shuffle_v3i64_v2i64__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -460,22 +460,22 @@ define void @v_shuffle_v3i64_v2i64__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -512,19 +512,19 @@ define void @v_shuffle_v3i64_v2i64__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -561,19 +561,19 @@ define void @v_shuffle_v3i64_v2i64__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -610,19 +610,19 @@ define void @v_shuffle_v3i64_v2i64__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -658,19 +658,19 @@ define void @v_shuffle_v3i64_v2i64__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> zeroinitializer store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -706,19 +706,19 @@ define void @v_shuffle_v3i64_v2i64__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -754,19 +754,19 @@ define void @v_shuffle_v3i64_v2i64__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -812,24 +812,24 @@ define void @v_shuffle_v3i64_v2i64__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -872,23 +872,23 @@ define void @v_shuffle_v3i64_v2i64__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -931,22 +931,22 @@ define void @v_shuffle_v3i64_v2i64__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -989,23 +989,23 @@ define void @v_shuffle_v3i64_v2i64__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1038,17 +1038,17 @@ define void @v_shuffle_v3i64_v2i64__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1080,17 +1080,17 @@ define void @v_shuffle_v3i64_v2i64__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1126,19 +1126,19 @@ define void @v_shuffle_v3i64_v2i64__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1170,17 +1170,17 @@ define void @v_shuffle_v3i64_v2i64__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1222,22 +1222,22 @@ define void @v_shuffle_v3i64_v2i64__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1280,22 +1280,22 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1342,24 +1342,24 @@ define void @v_shuffle_v3i64_v2i64__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1406,24 +1406,24 @@ define void @v_shuffle_v3i64_v2i64__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1465,16 +1465,16 @@ define void @v_shuffle_v3i64_v2i64__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1508,18 +1508,18 @@ define void @v_shuffle_v3i64_v2i64__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1566,19 +1566,19 @@ define void @v_shuffle_v3i64_v2i64__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1615,19 +1615,19 @@ define void @v_shuffle_v3i64_v2i64__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1674,24 +1674,24 @@ define void @v_shuffle_v3i64_v2i64__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1734,22 +1734,22 @@ define void @v_shuffle_v3i64_v2i64__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1782,17 +1782,17 @@ define void @v_shuffle_v3i64_v2i64__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1835,22 +1835,22 @@ define void @v_shuffle_v3i64_v2i64__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1893,22 +1893,22 @@ define void @v_shuffle_v3i64_v2i64__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1941,17 +1941,17 @@ define void @v_shuffle_v3i64_v2i64__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -1988,19 +1988,19 @@ define void @v_shuffle_v3i64_v2i64__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2047,24 +2047,24 @@ define void @v_shuffle_v3i64_v2i64__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2107,22 +2107,22 @@ define void @v_shuffle_v3i64_v2i64__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2159,19 +2159,19 @@ define void @v_shuffle_v3i64_v2i64__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v2i64__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2216,17 +2216,17 @@ define void @s_shuffle_v3i64_v2i64__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2260,18 +2260,18 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2319,18 +2319,18 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2375,23 +2375,23 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2432,21 +2432,21 @@ define void @s_shuffle_v3i64_v2i64__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2485,20 +2485,20 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2563,23 +2563,23 @@ define void @s_shuffle_v3i64_v2i64__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2624,23 +2624,23 @@ define void @s_shuffle_v3i64_v2i64__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2683,22 +2683,22 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2759,20 +2759,20 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2835,22 +2835,22 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2888,20 +2888,20 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2949,25 +2949,25 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3012,23 +3012,23 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3077,25 +3077,25 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3144,25 +3144,25 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3285,23 +3285,23 @@ define void @s_shuffle_v3i64_v2i64__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3346,23 +3346,23 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3411,25 +3411,25 @@ define void @s_shuffle_v3i64_v2i64__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3478,25 +3478,25 @@ define void @s_shuffle_v3i64_v2i64__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3541,17 +3541,17 @@ define void @s_shuffle_v3i64_v2i64__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3585,18 +3585,18 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3652,22 +3652,22 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3706,20 +3706,20 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3764,25 +3764,25 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3827,23 +3827,23 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3908,23 +3908,23 @@ define void @s_shuffle_v3i64_v2i64__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3969,23 +3969,23 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -4044,20 +4044,20 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -4106,25 +4106,25 @@ define void @s_shuffle_v3i64_v2i64__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -4169,23 +4169,23 @@ define void @s_shuffle_v3i64_v2i64__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -4228,22 +4228,22 @@ define void @s_shuffle_v3i64_v2i64__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v2i64__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll index bd55d30d04707..f15dd7d2772e5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i64_v3i64__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3i64_v3i64__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v3i64_v3i64__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -123,18 +123,18 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -175,16 +175,16 @@ define void @v_shuffle_v3i64_v3i64__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -219,18 +219,18 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -275,24 +275,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -333,22 +333,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -389,22 +389,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -443,20 +443,20 @@ define void @v_shuffle_v3i64_v3i64__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -491,18 +491,18 @@ define void @v_shuffle_v3i64_v3i64__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -537,18 +537,18 @@ define void @v_shuffle_v3i64_v3i64__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -591,22 +591,22 @@ define void @v_shuffle_v3i64_v3i64__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -649,22 +649,22 @@ define void @v_shuffle_v3i64_v3i64__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -707,22 +707,22 @@ define void @v_shuffle_v3i64_v3i64__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -759,19 +759,19 @@ define void @v_shuffle_v3i64_v3i64__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -808,19 +808,19 @@ define void @v_shuffle_v3i64_v3i64__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -857,19 +857,19 @@ define void @v_shuffle_v3i64_v3i64__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -906,19 +906,19 @@ define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -954,19 +954,19 @@ define void @v_shuffle_v3i64_v3i64__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1002,19 +1002,19 @@ define void @v_shuffle_v3i64_v3i64__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1054,21 +1054,21 @@ define void @v_shuffle_v3i64_v3i64__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1104,19 +1104,19 @@ define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1158,22 +1158,22 @@ define void @v_shuffle_v3i64_v3i64__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1220,24 +1220,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1280,23 +1280,23 @@ define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1339,23 +1339,23 @@ define void @v_shuffle_v3i64_v3i64__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1398,22 +1398,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1456,23 +1456,23 @@ define void @v_shuffle_v3i64_v3i64__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1515,22 +1515,22 @@ define void @v_shuffle_v3i64_v3i64__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1563,17 +1563,17 @@ define void @v_shuffle_v3i64_v3i64__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1605,17 +1605,17 @@ define void @v_shuffle_v3i64_v3i64__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1651,19 +1651,19 @@ define void @v_shuffle_v3i64_v3i64__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1699,19 +1699,19 @@ define void @v_shuffle_v3i64_v3i64__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1743,17 +1743,17 @@ define void @v_shuffle_v3i64_v3i64__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1795,22 +1795,22 @@ define void @v_shuffle_v3i64_v3i64__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1853,22 +1853,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1911,22 +1911,22 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -1973,24 +1973,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2033,22 +2033,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2095,25 +2095,25 @@ define void @v_shuffle_v3i64_v3i64__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2156,22 +2156,22 @@ define void @v_shuffle_v3i64_v3i64__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2204,17 +2204,17 @@ define void @v_shuffle_v3i64_v3i64__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2250,19 +2250,19 @@ define void @v_shuffle_v3i64_v3i64__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2294,17 +2294,17 @@ define void @v_shuffle_v3i64_v3i64__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2340,19 +2340,19 @@ define void @v_shuffle_v3i64_v3i64__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2384,17 +2384,17 @@ define void @v_shuffle_v3i64_v3i64__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2436,22 +2436,22 @@ define void @v_shuffle_v3i64_v3i64__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2494,22 +2494,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2552,22 +2552,22 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2614,24 +2614,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2674,22 +2674,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2736,24 +2736,24 @@ define void @v_shuffle_v3i64_v3i64__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2796,22 +2796,22 @@ define void @v_shuffle_v3i64_v3i64__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -2855,17 +2855,17 @@ define void @v_shuffle_v3i64_v3i64__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2895,16 +2895,16 @@ define void @v_shuffle_v3i64_v3i64__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2938,18 +2938,18 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2996,19 +2996,19 @@ define void @v_shuffle_v3i64_v3i64__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3049,21 +3049,21 @@ define void @v_shuffle_v3i64_v3i64__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3100,19 +3100,19 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3159,24 +3159,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3219,22 +3219,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3277,22 +3277,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3329,19 +3329,19 @@ define void @v_shuffle_v3i64_v3i64__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3374,17 +3374,17 @@ define void @v_shuffle_v3i64_v3i64__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3427,22 +3427,22 @@ define void @v_shuffle_v3i64_v3i64__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3485,22 +3485,22 @@ define void @v_shuffle_v3i64_v3i64__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3543,22 +3543,22 @@ define void @v_shuffle_v3i64_v3i64__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3591,17 +3591,17 @@ define void @v_shuffle_v3i64_v3i64__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3638,19 +3638,19 @@ define void @v_shuffle_v3i64_v3i64__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3687,19 +3687,19 @@ define void @v_shuffle_v3i64_v3i64__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3736,19 +3736,19 @@ define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3795,24 +3795,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3855,22 +3855,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3913,22 +3913,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -3969,21 +3969,21 @@ define void @v_shuffle_v3i64_v3i64__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4016,17 +4016,17 @@ define void @v_shuffle_v3i64_v3i64__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4069,22 +4069,22 @@ define void @v_shuffle_v3i64_v3i64__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4127,22 +4127,22 @@ define void @v_shuffle_v3i64_v3i64__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4185,22 +4185,22 @@ define void @v_shuffle_v3i64_v3i64__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4237,19 +4237,19 @@ define void @v_shuffle_v3i64_v3i64__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4282,17 +4282,17 @@ define void @v_shuffle_v3i64_v3i64__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4329,19 +4329,19 @@ define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4388,24 +4388,24 @@ define void @v_shuffle_v3i64_v3i64__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4448,22 +4448,22 @@ define void @v_shuffle_v3i64_v3i64__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4506,22 +4506,22 @@ define void @v_shuffle_v3i64_v3i64__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4562,21 +4562,21 @@ define void @v_shuffle_v3i64_v3i64__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4613,19 +4613,19 @@ define void @v_shuffle_v3i64_v3i64__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4670,17 +4670,17 @@ define void @s_shuffle_v3i64_v3i64__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -4714,18 +4714,18 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -4755,18 +4755,18 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -4814,18 +4814,18 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4856,18 +4856,18 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4912,21 +4912,21 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -4963,21 +4963,21 @@ define void @s_shuffle_v3i64_v3i64__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5018,23 +5018,23 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5069,20 +5069,20 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5141,20 +5141,20 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5203,25 +5203,25 @@ define void @s_shuffle_v3i64_v3i64__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5270,25 +5270,25 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5333,23 +5333,23 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5388,22 +5388,22 @@ define void @s_shuffle_v3i64_v3i64__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5446,22 +5446,22 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5522,20 +5522,20 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -5598,22 +5598,22 @@ define void @s_shuffle_v3i64_v3i64__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -5651,22 +5651,22 @@ define void @s_shuffle_v3i64_v3i64__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -5704,20 +5704,20 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -5765,25 +5765,25 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5832,23 +5832,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5893,21 +5893,21 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5956,23 +5956,23 @@ define void @s_shuffle_v3i64_v3i64__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6013,25 +6013,25 @@ define void @s_shuffle_v3i64_v3i64__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6080,23 +6080,23 @@ define void @s_shuffle_v3i64_v3i64__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6141,23 +6141,23 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6301,23 +6301,23 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6358,23 +6358,23 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6419,21 +6419,21 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6482,23 +6482,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6543,25 +6543,25 @@ define void @s_shuffle_v3i64_v3i64__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6610,23 +6610,23 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6671,23 +6671,23 @@ define void @s_shuffle_v3i64_v3i64__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6831,23 +6831,23 @@ define void @s_shuffle_v3i64_v3i64__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6888,23 +6888,23 @@ define void @s_shuffle_v3i64_v3i64__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -6941,21 +6941,21 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7000,25 +7000,25 @@ define void @s_shuffle_v3i64_v3i64__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7055,21 +7055,21 @@ define void @s_shuffle_v3i64_v3i64__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7110,23 +7110,23 @@ define void @s_shuffle_v3i64_v3i64__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7171,23 +7171,23 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7232,17 +7232,17 @@ define void @s_shuffle_v3i64_v3i64__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -7276,18 +7276,18 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -7317,18 +7317,18 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -7384,22 +7384,22 @@ define void @s_shuffle_v3i64_v3i64__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7438,22 +7438,22 @@ define void @s_shuffle_v3i64_v3i64__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7488,20 +7488,20 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7546,23 +7546,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7603,23 +7603,23 @@ define void @s_shuffle_v3i64_v3i64__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7664,23 +7664,23 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7719,22 +7719,22 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7799,23 +7799,23 @@ define void @s_shuffle_v3i64_v3i64__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7860,23 +7860,23 @@ define void @s_shuffle_v3i64_v3i64__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7921,23 +7921,23 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8036,20 +8036,20 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8098,23 +8098,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8155,23 +8155,23 @@ define void @s_shuffle_v3i64_v3i64__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8216,25 +8216,25 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8273,22 +8273,22 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8353,23 +8353,23 @@ define void @s_shuffle_v3i64_v3i64__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8414,23 +8414,23 @@ define void @s_shuffle_v3i64_v3i64__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8475,23 +8475,23 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8598,23 +8598,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8659,23 +8659,23 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8720,23 +8720,23 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8779,22 +8779,22 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll index 976c7b4fa704e..6e156d2d4a2f5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3i64_v4i64__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3i64_v4i64__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v3i64_v4i64__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -119,16 +119,16 @@ define void @v_shuffle_v3i64_v4i64__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -162,18 +162,18 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -214,16 +214,16 @@ define void @v_shuffle_v3i64_v4i64__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v3i64_v4i64__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -298,18 +298,18 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -354,24 +354,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -412,22 +412,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -468,22 +468,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -524,22 +524,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -578,20 +578,20 @@ define void @v_shuffle_v3i64_v4i64__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v3i64_v4i64__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -672,18 +672,18 @@ define void @v_shuffle_v3i64_v4i64__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -718,18 +718,18 @@ define void @v_shuffle_v3i64_v4i64__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -772,22 +772,22 @@ define void @v_shuffle_v3i64_v4i64__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -830,22 +830,22 @@ define void @v_shuffle_v3i64_v4i64__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -888,22 +888,22 @@ define void @v_shuffle_v3i64_v4i64__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -946,22 +946,22 @@ define void @v_shuffle_v3i64_v4i64__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -998,19 +998,19 @@ define void @v_shuffle_v3i64_v4i64__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1047,19 +1047,19 @@ define void @v_shuffle_v3i64_v4i64__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1096,19 +1096,19 @@ define void @v_shuffle_v3i64_v4i64__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1145,19 +1145,19 @@ define void @v_shuffle_v3i64_v4i64__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1194,19 +1194,19 @@ define void @v_shuffle_v3i64_v4i64__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1242,19 +1242,19 @@ define void @v_shuffle_v3i64_v4i64__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1290,19 +1290,19 @@ define void @v_shuffle_v3i64_v4i64__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1338,19 +1338,19 @@ define void @v_shuffle_v3i64_v4i64__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1390,21 +1390,21 @@ define void @v_shuffle_v3i64_v4i64__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1440,19 +1440,19 @@ define void @v_shuffle_v3i64_v4i64__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1494,22 +1494,22 @@ define void @v_shuffle_v3i64_v4i64__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1552,22 +1552,22 @@ define void @v_shuffle_v3i64_v4i64__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1614,24 +1614,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1674,23 +1674,23 @@ define void @v_shuffle_v3i64_v4i64__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1733,23 +1733,23 @@ define void @v_shuffle_v3i64_v4i64__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1792,22 +1792,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1850,22 +1850,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1908,23 +1908,23 @@ define void @v_shuffle_v3i64_v4i64__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -1967,22 +1967,22 @@ define void @v_shuffle_v3i64_v4i64__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2025,22 +2025,22 @@ define void @v_shuffle_v3i64_v4i64__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2073,17 +2073,17 @@ define void @v_shuffle_v3i64_v4i64__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2115,17 +2115,17 @@ define void @v_shuffle_v3i64_v4i64__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2161,19 +2161,19 @@ define void @v_shuffle_v3i64_v4i64__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2209,19 +2209,19 @@ define void @v_shuffle_v3i64_v4i64__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2257,19 +2257,19 @@ define void @v_shuffle_v3i64_v4i64__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2301,17 +2301,17 @@ define void @v_shuffle_v3i64_v4i64__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2353,22 +2353,22 @@ define void @v_shuffle_v3i64_v4i64__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2411,22 +2411,22 @@ define void @v_shuffle_v3i64_v4i64__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2469,22 +2469,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2527,22 +2527,22 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2589,24 +2589,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2649,23 +2649,23 @@ define void @v_shuffle_v3i64_v4i64__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2708,22 +2708,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2770,25 +2770,25 @@ define void @v_shuffle_v3i64_v4i64__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2831,22 +2831,22 @@ define void @v_shuffle_v3i64_v4i64__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2889,22 +2889,22 @@ define void @v_shuffle_v3i64_v4i64__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -2937,17 +2937,17 @@ define void @v_shuffle_v3i64_v4i64__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2983,19 +2983,19 @@ define void @v_shuffle_v3i64_v4i64__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3027,17 +3027,17 @@ define void @v_shuffle_v3i64_v4i64__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3073,19 +3073,19 @@ define void @v_shuffle_v3i64_v4i64__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3121,19 +3121,19 @@ define void @v_shuffle_v3i64_v4i64__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3165,17 +3165,17 @@ define void @v_shuffle_v3i64_v4i64__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3217,22 +3217,22 @@ define void @v_shuffle_v3i64_v4i64__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3275,22 +3275,22 @@ define void @v_shuffle_v3i64_v4i64__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3333,22 +3333,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3391,22 +3391,22 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3453,24 +3453,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3513,22 +3513,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3571,22 +3571,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3633,24 +3633,24 @@ define void @v_shuffle_v3i64_v4i64__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3693,22 +3693,22 @@ define void @v_shuffle_v3i64_v4i64__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3751,22 +3751,22 @@ define void @v_shuffle_v3i64_v4i64__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -3799,17 +3799,17 @@ define void @v_shuffle_v3i64_v4i64__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3845,19 +3845,19 @@ define void @v_shuffle_v3i64_v4i64__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3893,19 +3893,19 @@ define void @v_shuffle_v3i64_v4i64__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3937,17 +3937,17 @@ define void @v_shuffle_v3i64_v4i64__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3983,19 +3983,19 @@ define void @v_shuffle_v3i64_v4i64__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4027,17 +4027,17 @@ define void @v_shuffle_v3i64_v4i64__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4079,22 +4079,22 @@ define void @v_shuffle_v3i64_v4i64__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4137,22 +4137,22 @@ define void @v_shuffle_v3i64_v4i64__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4195,22 +4195,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4253,22 +4253,22 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4315,24 +4315,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4375,22 +4375,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4433,22 +4433,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4495,24 +4495,24 @@ define void @v_shuffle_v3i64_v4i64__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4555,22 +4555,22 @@ define void @v_shuffle_v3i64_v4i64__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4613,22 +4613,22 @@ define void @v_shuffle_v3i64_v4i64__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4672,17 +4672,17 @@ define void @v_shuffle_v3i64_v4i64__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4712,16 +4712,16 @@ define void @v_shuffle_v3i64_v4i64__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4751,16 +4751,16 @@ define void @v_shuffle_v3i64_v4i64__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4794,18 +4794,18 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4852,19 +4852,19 @@ define void @v_shuffle_v3i64_v4i64__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4901,19 +4901,19 @@ define void @v_shuffle_v3i64_v4i64__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -4954,21 +4954,21 @@ define void @v_shuffle_v3i64_v4i64__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5005,19 +5005,19 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5064,24 +5064,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5124,22 +5124,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5182,22 +5182,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5240,22 +5240,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5292,19 +5292,19 @@ define void @v_shuffle_v3i64_v4i64__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5341,19 +5341,19 @@ define void @v_shuffle_v3i64_v4i64__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5386,17 +5386,17 @@ define void @v_shuffle_v3i64_v4i64__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5439,22 +5439,22 @@ define void @v_shuffle_v3i64_v4i64__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5497,22 +5497,22 @@ define void @v_shuffle_v3i64_v4i64__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5555,22 +5555,22 @@ define void @v_shuffle_v3i64_v4i64__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5613,22 +5613,22 @@ define void @v_shuffle_v3i64_v4i64__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5661,17 +5661,17 @@ define void @v_shuffle_v3i64_v4i64__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5708,19 +5708,19 @@ define void @v_shuffle_v3i64_v4i64__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5757,19 +5757,19 @@ define void @v_shuffle_v3i64_v4i64__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5806,19 +5806,19 @@ define void @v_shuffle_v3i64_v4i64__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5855,19 +5855,19 @@ define void @v_shuffle_v3i64_v4i64__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5914,24 +5914,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -5974,22 +5974,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6032,22 +6032,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6090,22 +6090,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6146,21 +6146,21 @@ define void @v_shuffle_v3i64_v4i64__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6197,19 +6197,19 @@ define void @v_shuffle_v3i64_v4i64__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6242,17 +6242,17 @@ define void @v_shuffle_v3i64_v4i64__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6295,22 +6295,22 @@ define void @v_shuffle_v3i64_v4i64__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6353,22 +6353,22 @@ define void @v_shuffle_v3i64_v4i64__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6411,22 +6411,22 @@ define void @v_shuffle_v3i64_v4i64__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6469,22 +6469,22 @@ define void @v_shuffle_v3i64_v4i64__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6521,19 +6521,19 @@ define void @v_shuffle_v3i64_v4i64__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6566,17 +6566,17 @@ define void @v_shuffle_v3i64_v4i64__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6613,19 +6613,19 @@ define void @v_shuffle_v3i64_v4i64__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6662,19 +6662,19 @@ define void @v_shuffle_v3i64_v4i64__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6711,19 +6711,19 @@ define void @v_shuffle_v3i64_v4i64__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6770,24 +6770,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6830,22 +6830,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6888,22 +6888,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -6946,22 +6946,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7002,21 +7002,21 @@ define void @v_shuffle_v3i64_v4i64__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7053,19 +7053,19 @@ define void @v_shuffle_v3i64_v4i64__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7098,17 +7098,17 @@ define void @v_shuffle_v3i64_v4i64__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7151,22 +7151,22 @@ define void @v_shuffle_v3i64_v4i64__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7209,22 +7209,22 @@ define void @v_shuffle_v3i64_v4i64__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7267,22 +7267,22 @@ define void @v_shuffle_v3i64_v4i64__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7325,22 +7325,22 @@ define void @v_shuffle_v3i64_v4i64__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7377,19 +7377,19 @@ define void @v_shuffle_v3i64_v4i64__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7426,19 +7426,19 @@ define void @v_shuffle_v3i64_v4i64__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7471,17 +7471,17 @@ define void @v_shuffle_v3i64_v4i64__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7518,19 +7518,19 @@ define void @v_shuffle_v3i64_v4i64__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7577,24 +7577,24 @@ define void @v_shuffle_v3i64_v4i64__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7637,22 +7637,22 @@ define void @v_shuffle_v3i64_v4i64__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7695,22 +7695,22 @@ define void @v_shuffle_v3i64_v4i64__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7753,22 +7753,22 @@ define void @v_shuffle_v3i64_v4i64__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7809,21 +7809,21 @@ define void @v_shuffle_v3i64_v4i64__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7860,19 +7860,19 @@ define void @v_shuffle_v3i64_v4i64__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7909,19 +7909,19 @@ define void @v_shuffle_v3i64_v4i64__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3i64_v4i64__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -7966,17 +7966,17 @@ define void @s_shuffle_v3i64_v4i64__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -8010,18 +8010,18 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -8051,18 +8051,18 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -8096,18 +8096,18 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -8155,18 +8155,18 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8197,18 +8197,18 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8243,18 +8243,18 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8299,24 +8299,24 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8357,21 +8357,21 @@ define void @s_shuffle_v3i64_v4i64__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8416,23 +8416,23 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8473,23 +8473,23 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8528,20 +8528,20 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8600,20 +8600,20 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8648,20 +8648,20 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8710,23 +8710,23 @@ define void @s_shuffle_v3i64_v4i64__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8775,23 +8775,23 @@ define void @s_shuffle_v3i64_v4i64__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8832,23 +8832,23 @@ define void @s_shuffle_v3i64_v4i64__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8893,25 +8893,25 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8950,22 +8950,22 @@ define void @s_shuffle_v3i64_v4i64__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9004,22 +9004,22 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9084,22 +9084,22 @@ define void @s_shuffle_v3i64_v4i64__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9138,20 +9138,20 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9214,22 +9214,22 @@ define void @s_shuffle_v3i64_v4i64__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9267,22 +9267,22 @@ define void @s_shuffle_v3i64_v4i64__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9324,22 +9324,22 @@ define void @s_shuffle_v3i64_v4i64__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9377,20 +9377,20 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9438,25 +9438,25 @@ define void @s_shuffle_v3i64_v4i64__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9505,23 +9505,23 @@ define void @s_shuffle_v3i64_v4i64__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9570,25 +9570,25 @@ define void @s_shuffle_v3i64_v4i64__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9633,23 +9633,23 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9698,25 +9698,25 @@ define void @s_shuffle_v3i64_v4i64__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9761,25 +9761,25 @@ define void @s_shuffle_v3i64_v4i64__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9824,25 +9824,25 @@ define void @s_shuffle_v3i64_v4i64__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9891,25 +9891,25 @@ define void @s_shuffle_v3i64_v4i64__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9954,23 +9954,23 @@ define void @s_shuffle_v3i64_v4i64__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10019,25 +10019,25 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10202,23 +10202,23 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10263,23 +10263,23 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10324,23 +10324,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10385,23 +10385,23 @@ define void @s_shuffle_v3i64_v4i64__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10450,25 +10450,25 @@ define void @s_shuffle_v3i64_v4i64__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10517,25 +10517,25 @@ define void @s_shuffle_v3i64_v4i64__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10580,25 +10580,25 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10647,25 +10647,25 @@ define void @s_shuffle_v3i64_v4i64__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10710,23 +10710,23 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10775,25 +10775,25 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -10958,23 +10958,23 @@ define void @s_shuffle_v3i64_v4i64__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11015,23 +11015,23 @@ define void @s_shuffle_v3i64_v4i64__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11076,23 +11076,23 @@ define void @s_shuffle_v3i64_v4i64__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11133,21 +11133,21 @@ define void @s_shuffle_v3i64_v4i64__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11196,25 +11196,25 @@ define void @s_shuffle_v3i64_v4i64__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11255,21 +11255,21 @@ define void @s_shuffle_v3i64_v4i64__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11314,23 +11314,23 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11375,23 +11375,23 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11436,23 +11436,23 @@ define void @s_shuffle_v3i64_v4i64__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11497,23 +11497,23 @@ define void @s_shuffle_v3i64_v4i64__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11552,20 +11552,20 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -11628,22 +11628,22 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -11685,22 +11685,22 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -11742,22 +11742,22 @@ define void @s_shuffle_v3i64_v4i64__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -11795,20 +11795,20 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -11856,25 +11856,25 @@ define void @s_shuffle_v3i64_v4i64__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11919,25 +11919,25 @@ define void @s_shuffle_v3i64_v4i64__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -11986,25 +11986,25 @@ define void @s_shuffle_v3i64_v4i64__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12049,23 +12049,23 @@ define void @s_shuffle_v3i64_v4i64__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12114,25 +12114,25 @@ define void @s_shuffle_v3i64_v4i64__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12177,23 +12177,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12242,25 +12242,25 @@ define void @s_shuffle_v3i64_v4i64__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12309,25 +12309,25 @@ define void @s_shuffle_v3i64_v4i64__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12372,23 +12372,23 @@ define void @s_shuffle_v3i64_v4i64__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12437,25 +12437,25 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12500,17 +12500,17 @@ define void @s_shuffle_v3i64_v4i64__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -12544,18 +12544,18 @@ define void @s_shuffle_v3i64_v4i64__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -12585,18 +12585,18 @@ define void @s_shuffle_v3i64_v4i64__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -12630,18 +12630,18 @@ define void @s_shuffle_v3i64_v4i64__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -12697,22 +12697,22 @@ define void @s_shuffle_v3i64_v4i64__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12751,22 +12751,22 @@ define void @s_shuffle_v3i64_v4i64__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12809,22 +12809,22 @@ define void @s_shuffle_v3i64_v4i64__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12863,20 +12863,20 @@ define void @s_shuffle_v3i64_v4i64__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12921,26 +12921,26 @@ define void @s_shuffle_v3i64_v4i64__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -12981,23 +12981,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13046,23 +13046,23 @@ define void @s_shuffle_v3i64_v4i64__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13103,23 +13103,23 @@ define void @s_shuffle_v3i64_v4i64__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13162,22 +13162,22 @@ define void @s_shuffle_v3i64_v4i64__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13216,22 +13216,22 @@ define void @s_shuffle_v3i64_v4i64__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13296,23 +13296,23 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13357,23 +13357,23 @@ define void @s_shuffle_v3i64_v4i64__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13418,23 +13418,23 @@ define void @s_shuffle_v3i64_v4i64__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13479,23 +13479,23 @@ define void @s_shuffle_v3i64_v4i64__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13620,20 +13620,20 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13682,26 +13682,26 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13746,23 +13746,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13811,25 +13811,25 @@ define void @s_shuffle_v3i64_v4i64__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13874,25 +13874,25 @@ define void @s_shuffle_v3i64_v4i64__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13935,22 +13935,22 @@ define void @s_shuffle_v3i64_v4i64__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -13993,22 +13993,22 @@ define void @s_shuffle_v3i64_v4i64__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14073,23 +14073,23 @@ define void @s_shuffle_v3i64_v4i64__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14134,23 +14134,23 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14195,23 +14195,23 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14256,23 +14256,23 @@ define void @s_shuffle_v3i64_v4i64__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14423,23 +14423,23 @@ define void @s_shuffle_v3i64_v4i64__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14484,23 +14484,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14545,23 +14545,23 @@ define void @s_shuffle_v3i64_v4i64__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14606,23 +14606,23 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14665,22 +14665,22 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14739,20 +14739,20 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14797,23 +14797,23 @@ define void @s_shuffle_v3i64_v4i64__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14862,25 +14862,25 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14925,25 +14925,25 @@ define void @s_shuffle_v3i64_v4i64__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14992,25 +14992,25 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15075,22 +15075,22 @@ define void @s_shuffle_v3i64_v4i64__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15133,22 +15133,22 @@ define void @s_shuffle_v3i64_v4i64__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15187,20 +15187,20 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15249,25 +15249,25 @@ define void @s_shuffle_v3i64_v4i64__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15312,23 +15312,23 @@ define void @s_shuffle_v3i64_v4i64__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15377,25 +15377,25 @@ define void @s_shuffle_v3i64_v4i64__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15440,25 +15440,25 @@ define void @s_shuffle_v3i64_v4i64__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15501,22 +15501,22 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15581,22 +15581,22 @@ define void @s_shuffle_v3i64_v4i64__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3i64_v4i64__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll index 3075bfab1dc85..fe132493ce536 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p0_v2p0__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3p0_v2p0__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -81,18 +81,18 @@ define void @v_shuffle_v3p0_v2p0__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -137,18 +137,18 @@ define void @v_shuffle_v3p0_v2p0__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v3p0_v2p0__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v3p0_v2p0__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v3p0_v2p0__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v3p0_v2p0__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -402,22 +402,22 @@ define void @v_shuffle_v3p0_v2p0__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -460,22 +460,22 @@ define void @v_shuffle_v3p0_v2p0__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -512,19 +512,19 @@ define void @v_shuffle_v3p0_v2p0__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -561,19 +561,19 @@ define void @v_shuffle_v3p0_v2p0__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -610,19 +610,19 @@ define void @v_shuffle_v3p0_v2p0__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -658,19 +658,19 @@ define void @v_shuffle_v3p0_v2p0__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> zeroinitializer store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -706,19 +706,19 @@ define void @v_shuffle_v3p0_v2p0__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -754,19 +754,19 @@ define void @v_shuffle_v3p0_v2p0__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -812,24 +812,24 @@ define void @v_shuffle_v3p0_v2p0__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -872,23 +872,23 @@ define void @v_shuffle_v3p0_v2p0__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -931,22 +931,22 @@ define void @v_shuffle_v3p0_v2p0__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -989,23 +989,23 @@ define void @v_shuffle_v3p0_v2p0__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1038,17 +1038,17 @@ define void @v_shuffle_v3p0_v2p0__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1080,17 +1080,17 @@ define void @v_shuffle_v3p0_v2p0__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1126,19 +1126,19 @@ define void @v_shuffle_v3p0_v2p0__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1170,17 +1170,17 @@ define void @v_shuffle_v3p0_v2p0__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1222,22 +1222,22 @@ define void @v_shuffle_v3p0_v2p0__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1280,22 +1280,22 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1342,24 +1342,24 @@ define void @v_shuffle_v3p0_v2p0__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1406,24 +1406,24 @@ define void @v_shuffle_v3p0_v2p0__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1465,16 +1465,16 @@ define void @v_shuffle_v3p0_v2p0__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1508,18 +1508,18 @@ define void @v_shuffle_v3p0_v2p0__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1566,19 +1566,19 @@ define void @v_shuffle_v3p0_v2p0__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1615,19 +1615,19 @@ define void @v_shuffle_v3p0_v2p0__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1674,24 +1674,24 @@ define void @v_shuffle_v3p0_v2p0__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1734,22 +1734,22 @@ define void @v_shuffle_v3p0_v2p0__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1782,17 +1782,17 @@ define void @v_shuffle_v3p0_v2p0__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1835,22 +1835,22 @@ define void @v_shuffle_v3p0_v2p0__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1893,22 +1893,22 @@ define void @v_shuffle_v3p0_v2p0__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1941,17 +1941,17 @@ define void @v_shuffle_v3p0_v2p0__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -1988,19 +1988,19 @@ define void @v_shuffle_v3p0_v2p0__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2047,24 +2047,24 @@ define void @v_shuffle_v3p0_v2p0__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2107,22 +2107,22 @@ define void @v_shuffle_v3p0_v2p0__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2159,19 +2159,19 @@ define void @v_shuffle_v3p0_v2p0__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v2p0__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2216,17 +2216,17 @@ define void @s_shuffle_v3p0_v2p0__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2260,18 +2260,18 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2319,18 +2319,18 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2375,23 +2375,23 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2432,21 +2432,21 @@ define void @s_shuffle_v3p0_v2p0__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2485,20 +2485,20 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2563,23 +2563,23 @@ define void @s_shuffle_v3p0_v2p0__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2624,23 +2624,23 @@ define void @s_shuffle_v3p0_v2p0__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2683,22 +2683,22 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2759,20 +2759,20 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2835,22 +2835,22 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2888,20 +2888,20 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2949,25 +2949,25 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3012,23 +3012,23 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3077,25 +3077,25 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3144,25 +3144,25 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3285,23 +3285,23 @@ define void @s_shuffle_v3p0_v2p0__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3346,23 +3346,23 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3411,25 +3411,25 @@ define void @s_shuffle_v3p0_v2p0__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3478,25 +3478,25 @@ define void @s_shuffle_v3p0_v2p0__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3541,17 +3541,17 @@ define void @s_shuffle_v3p0_v2p0__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3585,18 +3585,18 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3652,22 +3652,22 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3706,20 +3706,20 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3764,25 +3764,25 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3827,23 +3827,23 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3908,23 +3908,23 @@ define void @s_shuffle_v3p0_v2p0__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3969,23 +3969,23 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -4044,20 +4044,20 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -4106,25 +4106,25 @@ define void @s_shuffle_v3p0_v2p0__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -4169,23 +4169,23 @@ define void @s_shuffle_v3p0_v2p0__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -4228,22 +4228,22 @@ define void @s_shuffle_v3p0_v2p0__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v2p0__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll index a33bb00a738e6..b6f4e3091b61f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p0_v3p0__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3p0_v3p0__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v3p0_v3p0__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -123,18 +123,18 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -175,16 +175,16 @@ define void @v_shuffle_v3p0_v3p0__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -219,18 +219,18 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -275,24 +275,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -333,22 +333,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -389,22 +389,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -443,20 +443,20 @@ define void @v_shuffle_v3p0_v3p0__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -491,18 +491,18 @@ define void @v_shuffle_v3p0_v3p0__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -537,18 +537,18 @@ define void @v_shuffle_v3p0_v3p0__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -591,22 +591,22 @@ define void @v_shuffle_v3p0_v3p0__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -649,22 +649,22 @@ define void @v_shuffle_v3p0_v3p0__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -707,22 +707,22 @@ define void @v_shuffle_v3p0_v3p0__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -759,19 +759,19 @@ define void @v_shuffle_v3p0_v3p0__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -808,19 +808,19 @@ define void @v_shuffle_v3p0_v3p0__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -857,19 +857,19 @@ define void @v_shuffle_v3p0_v3p0__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -906,19 +906,19 @@ define void @v_shuffle_v3p0_v3p0__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -954,19 +954,19 @@ define void @v_shuffle_v3p0_v3p0__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> zeroinitializer store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1002,19 +1002,19 @@ define void @v_shuffle_v3p0_v3p0__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1054,21 +1054,21 @@ define void @v_shuffle_v3p0_v3p0__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1104,19 +1104,19 @@ define void @v_shuffle_v3p0_v3p0__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1158,22 +1158,22 @@ define void @v_shuffle_v3p0_v3p0__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1220,24 +1220,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1280,23 +1280,23 @@ define void @v_shuffle_v3p0_v3p0__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1339,23 +1339,23 @@ define void @v_shuffle_v3p0_v3p0__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1398,22 +1398,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1456,23 +1456,23 @@ define void @v_shuffle_v3p0_v3p0__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1515,22 +1515,22 @@ define void @v_shuffle_v3p0_v3p0__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1563,17 +1563,17 @@ define void @v_shuffle_v3p0_v3p0__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1605,17 +1605,17 @@ define void @v_shuffle_v3p0_v3p0__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1651,19 +1651,19 @@ define void @v_shuffle_v3p0_v3p0__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1699,19 +1699,19 @@ define void @v_shuffle_v3p0_v3p0__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1743,17 +1743,17 @@ define void @v_shuffle_v3p0_v3p0__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1795,22 +1795,22 @@ define void @v_shuffle_v3p0_v3p0__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1853,22 +1853,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1911,22 +1911,22 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -1973,24 +1973,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2033,22 +2033,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2095,25 +2095,25 @@ define void @v_shuffle_v3p0_v3p0__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2156,22 +2156,22 @@ define void @v_shuffle_v3p0_v3p0__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2204,17 +2204,17 @@ define void @v_shuffle_v3p0_v3p0__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2250,19 +2250,19 @@ define void @v_shuffle_v3p0_v3p0__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2294,17 +2294,17 @@ define void @v_shuffle_v3p0_v3p0__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2340,19 +2340,19 @@ define void @v_shuffle_v3p0_v3p0__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2384,17 +2384,17 @@ define void @v_shuffle_v3p0_v3p0__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2436,22 +2436,22 @@ define void @v_shuffle_v3p0_v3p0__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2494,22 +2494,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2552,22 +2552,22 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2614,24 +2614,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2674,22 +2674,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2736,24 +2736,24 @@ define void @v_shuffle_v3p0_v3p0__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2796,22 +2796,22 @@ define void @v_shuffle_v3p0_v3p0__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -2855,17 +2855,17 @@ define void @v_shuffle_v3p0_v3p0__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2895,16 +2895,16 @@ define void @v_shuffle_v3p0_v3p0__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2938,18 +2938,18 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2996,19 +2996,19 @@ define void @v_shuffle_v3p0_v3p0__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3049,21 +3049,21 @@ define void @v_shuffle_v3p0_v3p0__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3100,19 +3100,19 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3159,24 +3159,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3219,22 +3219,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3277,22 +3277,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3329,19 +3329,19 @@ define void @v_shuffle_v3p0_v3p0__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3374,17 +3374,17 @@ define void @v_shuffle_v3p0_v3p0__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3427,22 +3427,22 @@ define void @v_shuffle_v3p0_v3p0__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3485,22 +3485,22 @@ define void @v_shuffle_v3p0_v3p0__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3543,22 +3543,22 @@ define void @v_shuffle_v3p0_v3p0__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3591,17 +3591,17 @@ define void @v_shuffle_v3p0_v3p0__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3638,19 +3638,19 @@ define void @v_shuffle_v3p0_v3p0__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3687,19 +3687,19 @@ define void @v_shuffle_v3p0_v3p0__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3736,19 +3736,19 @@ define void @v_shuffle_v3p0_v3p0__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3795,24 +3795,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3855,22 +3855,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3913,22 +3913,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -3969,21 +3969,21 @@ define void @v_shuffle_v3p0_v3p0__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4016,17 +4016,17 @@ define void @v_shuffle_v3p0_v3p0__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4069,22 +4069,22 @@ define void @v_shuffle_v3p0_v3p0__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4127,22 +4127,22 @@ define void @v_shuffle_v3p0_v3p0__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4185,22 +4185,22 @@ define void @v_shuffle_v3p0_v3p0__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4237,19 +4237,19 @@ define void @v_shuffle_v3p0_v3p0__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4282,17 +4282,17 @@ define void @v_shuffle_v3p0_v3p0__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4329,19 +4329,19 @@ define void @v_shuffle_v3p0_v3p0__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4388,24 +4388,24 @@ define void @v_shuffle_v3p0_v3p0__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4448,22 +4448,22 @@ define void @v_shuffle_v3p0_v3p0__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4506,22 +4506,22 @@ define void @v_shuffle_v3p0_v3p0__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4562,21 +4562,21 @@ define void @v_shuffle_v3p0_v3p0__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4613,19 +4613,19 @@ define void @v_shuffle_v3p0_v3p0__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v3p0__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4670,17 +4670,17 @@ define void @s_shuffle_v3p0_v3p0__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -4714,18 +4714,18 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -4755,18 +4755,18 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -4814,18 +4814,18 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4856,18 +4856,18 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4912,21 +4912,21 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -4963,21 +4963,21 @@ define void @s_shuffle_v3p0_v3p0__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5018,23 +5018,23 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5069,20 +5069,20 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5141,20 +5141,20 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5203,25 +5203,25 @@ define void @s_shuffle_v3p0_v3p0__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5270,25 +5270,25 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5333,23 +5333,23 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5388,22 +5388,22 @@ define void @s_shuffle_v3p0_v3p0__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5446,22 +5446,22 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5522,20 +5522,20 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -5598,22 +5598,22 @@ define void @s_shuffle_v3p0_v3p0__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -5651,22 +5651,22 @@ define void @s_shuffle_v3p0_v3p0__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -5704,20 +5704,20 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -5765,25 +5765,25 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5832,23 +5832,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5893,21 +5893,21 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5956,23 +5956,23 @@ define void @s_shuffle_v3p0_v3p0__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6013,25 +6013,25 @@ define void @s_shuffle_v3p0_v3p0__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6080,23 +6080,23 @@ define void @s_shuffle_v3p0_v3p0__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6141,23 +6141,23 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6301,23 +6301,23 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6358,23 +6358,23 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6419,21 +6419,21 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6482,23 +6482,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6543,25 +6543,25 @@ define void @s_shuffle_v3p0_v3p0__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6610,23 +6610,23 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6671,23 +6671,23 @@ define void @s_shuffle_v3p0_v3p0__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6831,23 +6831,23 @@ define void @s_shuffle_v3p0_v3p0__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6888,23 +6888,23 @@ define void @s_shuffle_v3p0_v3p0__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -6941,21 +6941,21 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7000,25 +7000,25 @@ define void @s_shuffle_v3p0_v3p0__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7055,21 +7055,21 @@ define void @s_shuffle_v3p0_v3p0__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7110,23 +7110,23 @@ define void @s_shuffle_v3p0_v3p0__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7171,23 +7171,23 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7232,17 +7232,17 @@ define void @s_shuffle_v3p0_v3p0__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -7276,18 +7276,18 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -7317,18 +7317,18 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -7384,22 +7384,22 @@ define void @s_shuffle_v3p0_v3p0__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7438,22 +7438,22 @@ define void @s_shuffle_v3p0_v3p0__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7488,20 +7488,20 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7546,23 +7546,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7603,23 +7603,23 @@ define void @s_shuffle_v3p0_v3p0__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7664,23 +7664,23 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7719,22 +7719,22 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7799,23 +7799,23 @@ define void @s_shuffle_v3p0_v3p0__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7860,23 +7860,23 @@ define void @s_shuffle_v3p0_v3p0__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7921,23 +7921,23 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8036,20 +8036,20 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8098,23 +8098,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8155,23 +8155,23 @@ define void @s_shuffle_v3p0_v3p0__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8216,25 +8216,25 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8273,22 +8273,22 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8353,23 +8353,23 @@ define void @s_shuffle_v3p0_v3p0__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8414,23 +8414,23 @@ define void @s_shuffle_v3p0_v3p0__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8475,23 +8475,23 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8598,23 +8598,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8659,23 +8659,23 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8720,23 +8720,23 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8779,22 +8779,22 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v3p0__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll index 6c086a40c4153..b03066e66cf66 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p0_v4p0__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v3p0_v4p0__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v3p0_v4p0__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -119,16 +119,16 @@ define void @v_shuffle_v3p0_v4p0__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -162,18 +162,18 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -214,16 +214,16 @@ define void @v_shuffle_v3p0_v4p0__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v3p0_v4p0__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -298,18 +298,18 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -354,24 +354,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -412,22 +412,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -468,22 +468,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -524,22 +524,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -578,20 +578,20 @@ define void @v_shuffle_v3p0_v4p0__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v3p0_v4p0__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -672,18 +672,18 @@ define void @v_shuffle_v3p0_v4p0__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -718,18 +718,18 @@ define void @v_shuffle_v3p0_v4p0__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -772,22 +772,22 @@ define void @v_shuffle_v3p0_v4p0__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -830,22 +830,22 @@ define void @v_shuffle_v3p0_v4p0__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -888,22 +888,22 @@ define void @v_shuffle_v3p0_v4p0__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -946,22 +946,22 @@ define void @v_shuffle_v3p0_v4p0__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -998,19 +998,19 @@ define void @v_shuffle_v3p0_v4p0__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1047,19 +1047,19 @@ define void @v_shuffle_v3p0_v4p0__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1096,19 +1096,19 @@ define void @v_shuffle_v3p0_v4p0__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1145,19 +1145,19 @@ define void @v_shuffle_v3p0_v4p0__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1194,19 +1194,19 @@ define void @v_shuffle_v3p0_v4p0__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1242,19 +1242,19 @@ define void @v_shuffle_v3p0_v4p0__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1290,19 +1290,19 @@ define void @v_shuffle_v3p0_v4p0__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1338,19 +1338,19 @@ define void @v_shuffle_v3p0_v4p0__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1390,21 +1390,21 @@ define void @v_shuffle_v3p0_v4p0__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1440,19 +1440,19 @@ define void @v_shuffle_v3p0_v4p0__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1494,22 +1494,22 @@ define void @v_shuffle_v3p0_v4p0__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1552,22 +1552,22 @@ define void @v_shuffle_v3p0_v4p0__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1614,24 +1614,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1674,23 +1674,23 @@ define void @v_shuffle_v3p0_v4p0__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1733,23 +1733,23 @@ define void @v_shuffle_v3p0_v4p0__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1792,22 +1792,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1850,22 +1850,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1908,23 +1908,23 @@ define void @v_shuffle_v3p0_v4p0__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -1967,22 +1967,22 @@ define void @v_shuffle_v3p0_v4p0__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2025,22 +2025,22 @@ define void @v_shuffle_v3p0_v4p0__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2073,17 +2073,17 @@ define void @v_shuffle_v3p0_v4p0__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2115,17 +2115,17 @@ define void @v_shuffle_v3p0_v4p0__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2161,19 +2161,19 @@ define void @v_shuffle_v3p0_v4p0__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2209,19 +2209,19 @@ define void @v_shuffle_v3p0_v4p0__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2257,19 +2257,19 @@ define void @v_shuffle_v3p0_v4p0__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2301,17 +2301,17 @@ define void @v_shuffle_v3p0_v4p0__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2353,22 +2353,22 @@ define void @v_shuffle_v3p0_v4p0__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2411,22 +2411,22 @@ define void @v_shuffle_v3p0_v4p0__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2469,22 +2469,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2527,22 +2527,22 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2589,24 +2589,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2649,23 +2649,23 @@ define void @v_shuffle_v3p0_v4p0__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2708,22 +2708,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2770,25 +2770,25 @@ define void @v_shuffle_v3p0_v4p0__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2831,22 +2831,22 @@ define void @v_shuffle_v3p0_v4p0__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2889,22 +2889,22 @@ define void @v_shuffle_v3p0_v4p0__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -2937,17 +2937,17 @@ define void @v_shuffle_v3p0_v4p0__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2983,19 +2983,19 @@ define void @v_shuffle_v3p0_v4p0__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3027,17 +3027,17 @@ define void @v_shuffle_v3p0_v4p0__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3073,19 +3073,19 @@ define void @v_shuffle_v3p0_v4p0__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3121,19 +3121,19 @@ define void @v_shuffle_v3p0_v4p0__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3165,17 +3165,17 @@ define void @v_shuffle_v3p0_v4p0__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3217,22 +3217,22 @@ define void @v_shuffle_v3p0_v4p0__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3275,22 +3275,22 @@ define void @v_shuffle_v3p0_v4p0__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3333,22 +3333,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3391,22 +3391,22 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3453,24 +3453,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3513,22 +3513,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3571,22 +3571,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3633,24 +3633,24 @@ define void @v_shuffle_v3p0_v4p0__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3693,22 +3693,22 @@ define void @v_shuffle_v3p0_v4p0__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3751,22 +3751,22 @@ define void @v_shuffle_v3p0_v4p0__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -3799,17 +3799,17 @@ define void @v_shuffle_v3p0_v4p0__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3845,19 +3845,19 @@ define void @v_shuffle_v3p0_v4p0__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3893,19 +3893,19 @@ define void @v_shuffle_v3p0_v4p0__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3937,17 +3937,17 @@ define void @v_shuffle_v3p0_v4p0__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3983,19 +3983,19 @@ define void @v_shuffle_v3p0_v4p0__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4027,17 +4027,17 @@ define void @v_shuffle_v3p0_v4p0__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4079,22 +4079,22 @@ define void @v_shuffle_v3p0_v4p0__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4137,22 +4137,22 @@ define void @v_shuffle_v3p0_v4p0__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4195,22 +4195,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4253,22 +4253,22 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4315,24 +4315,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4375,22 +4375,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4433,22 +4433,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4495,24 +4495,24 @@ define void @v_shuffle_v3p0_v4p0__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4555,22 +4555,22 @@ define void @v_shuffle_v3p0_v4p0__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4613,22 +4613,22 @@ define void @v_shuffle_v3p0_v4p0__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4672,17 +4672,17 @@ define void @v_shuffle_v3p0_v4p0__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4712,16 +4712,16 @@ define void @v_shuffle_v3p0_v4p0__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4751,16 +4751,16 @@ define void @v_shuffle_v3p0_v4p0__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4794,18 +4794,18 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> store <3 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4852,19 +4852,19 @@ define void @v_shuffle_v3p0_v4p0__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4901,19 +4901,19 @@ define void @v_shuffle_v3p0_v4p0__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -4954,21 +4954,21 @@ define void @v_shuffle_v3p0_v4p0__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5005,19 +5005,19 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5064,24 +5064,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5124,22 +5124,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5182,22 +5182,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5240,22 +5240,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5292,19 +5292,19 @@ define void @v_shuffle_v3p0_v4p0__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5341,19 +5341,19 @@ define void @v_shuffle_v3p0_v4p0__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5386,17 +5386,17 @@ define void @v_shuffle_v3p0_v4p0__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5439,22 +5439,22 @@ define void @v_shuffle_v3p0_v4p0__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5497,22 +5497,22 @@ define void @v_shuffle_v3p0_v4p0__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5555,22 +5555,22 @@ define void @v_shuffle_v3p0_v4p0__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5613,22 +5613,22 @@ define void @v_shuffle_v3p0_v4p0__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5661,17 +5661,17 @@ define void @v_shuffle_v3p0_v4p0__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5708,19 +5708,19 @@ define void @v_shuffle_v3p0_v4p0__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5757,19 +5757,19 @@ define void @v_shuffle_v3p0_v4p0__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5806,19 +5806,19 @@ define void @v_shuffle_v3p0_v4p0__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5855,19 +5855,19 @@ define void @v_shuffle_v3p0_v4p0__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5914,24 +5914,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -5974,22 +5974,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6032,22 +6032,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6090,22 +6090,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6146,21 +6146,21 @@ define void @v_shuffle_v3p0_v4p0__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6197,19 +6197,19 @@ define void @v_shuffle_v3p0_v4p0__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6242,17 +6242,17 @@ define void @v_shuffle_v3p0_v4p0__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6295,22 +6295,22 @@ define void @v_shuffle_v3p0_v4p0__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6353,22 +6353,22 @@ define void @v_shuffle_v3p0_v4p0__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6411,22 +6411,22 @@ define void @v_shuffle_v3p0_v4p0__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6469,22 +6469,22 @@ define void @v_shuffle_v3p0_v4p0__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6521,19 +6521,19 @@ define void @v_shuffle_v3p0_v4p0__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6566,17 +6566,17 @@ define void @v_shuffle_v3p0_v4p0__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6613,19 +6613,19 @@ define void @v_shuffle_v3p0_v4p0__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6662,19 +6662,19 @@ define void @v_shuffle_v3p0_v4p0__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6711,19 +6711,19 @@ define void @v_shuffle_v3p0_v4p0__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6770,24 +6770,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6830,22 +6830,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6888,22 +6888,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -6946,22 +6946,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7002,21 +7002,21 @@ define void @v_shuffle_v3p0_v4p0__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7053,19 +7053,19 @@ define void @v_shuffle_v3p0_v4p0__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7098,17 +7098,17 @@ define void @v_shuffle_v3p0_v4p0__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7151,22 +7151,22 @@ define void @v_shuffle_v3p0_v4p0__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7209,22 +7209,22 @@ define void @v_shuffle_v3p0_v4p0__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7267,22 +7267,22 @@ define void @v_shuffle_v3p0_v4p0__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7325,22 +7325,22 @@ define void @v_shuffle_v3p0_v4p0__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7377,19 +7377,19 @@ define void @v_shuffle_v3p0_v4p0__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7426,19 +7426,19 @@ define void @v_shuffle_v3p0_v4p0__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7471,17 +7471,17 @@ define void @v_shuffle_v3p0_v4p0__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7518,19 +7518,19 @@ define void @v_shuffle_v3p0_v4p0__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7577,24 +7577,24 @@ define void @v_shuffle_v3p0_v4p0__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7637,22 +7637,22 @@ define void @v_shuffle_v3p0_v4p0__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7695,22 +7695,22 @@ define void @v_shuffle_v3p0_v4p0__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7753,22 +7753,22 @@ define void @v_shuffle_v3p0_v4p0__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7809,21 +7809,21 @@ define void @v_shuffle_v3p0_v4p0__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7860,19 +7860,19 @@ define void @v_shuffle_v3p0_v4p0__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7909,19 +7909,19 @@ define void @v_shuffle_v3p0_v4p0__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p0_v4p0__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -7966,17 +7966,17 @@ define void @s_shuffle_v3p0_v4p0__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -8010,18 +8010,18 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -8051,18 +8051,18 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -8096,18 +8096,18 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -8155,18 +8155,18 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8197,18 +8197,18 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8243,18 +8243,18 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8299,24 +8299,24 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8357,21 +8357,21 @@ define void @s_shuffle_v3p0_v4p0__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8416,23 +8416,23 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8473,23 +8473,23 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8528,20 +8528,20 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8600,20 +8600,20 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8648,20 +8648,20 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8710,23 +8710,23 @@ define void @s_shuffle_v3p0_v4p0__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8775,23 +8775,23 @@ define void @s_shuffle_v3p0_v4p0__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8832,23 +8832,23 @@ define void @s_shuffle_v3p0_v4p0__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8893,25 +8893,25 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8950,22 +8950,22 @@ define void @s_shuffle_v3p0_v4p0__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9004,22 +9004,22 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9084,22 +9084,22 @@ define void @s_shuffle_v3p0_v4p0__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9138,20 +9138,20 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9214,22 +9214,22 @@ define void @s_shuffle_v3p0_v4p0__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9267,22 +9267,22 @@ define void @s_shuffle_v3p0_v4p0__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9324,22 +9324,22 @@ define void @s_shuffle_v3p0_v4p0__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9377,20 +9377,20 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9438,25 +9438,25 @@ define void @s_shuffle_v3p0_v4p0__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9505,23 +9505,23 @@ define void @s_shuffle_v3p0_v4p0__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9570,25 +9570,25 @@ define void @s_shuffle_v3p0_v4p0__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9633,23 +9633,23 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9698,25 +9698,25 @@ define void @s_shuffle_v3p0_v4p0__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9761,25 +9761,25 @@ define void @s_shuffle_v3p0_v4p0__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9824,25 +9824,25 @@ define void @s_shuffle_v3p0_v4p0__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9891,25 +9891,25 @@ define void @s_shuffle_v3p0_v4p0__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9954,23 +9954,23 @@ define void @s_shuffle_v3p0_v4p0__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10019,25 +10019,25 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10202,23 +10202,23 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10263,23 +10263,23 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10324,23 +10324,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10385,23 +10385,23 @@ define void @s_shuffle_v3p0_v4p0__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10450,25 +10450,25 @@ define void @s_shuffle_v3p0_v4p0__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10517,25 +10517,25 @@ define void @s_shuffle_v3p0_v4p0__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10580,25 +10580,25 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10647,25 +10647,25 @@ define void @s_shuffle_v3p0_v4p0__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10710,23 +10710,23 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10775,25 +10775,25 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -10958,23 +10958,23 @@ define void @s_shuffle_v3p0_v4p0__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11015,23 +11015,23 @@ define void @s_shuffle_v3p0_v4p0__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11076,23 +11076,23 @@ define void @s_shuffle_v3p0_v4p0__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11133,21 +11133,21 @@ define void @s_shuffle_v3p0_v4p0__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11196,25 +11196,25 @@ define void @s_shuffle_v3p0_v4p0__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11255,21 +11255,21 @@ define void @s_shuffle_v3p0_v4p0__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11314,23 +11314,23 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11375,23 +11375,23 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11436,23 +11436,23 @@ define void @s_shuffle_v3p0_v4p0__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11497,23 +11497,23 @@ define void @s_shuffle_v3p0_v4p0__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11552,20 +11552,20 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -11628,22 +11628,22 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -11685,22 +11685,22 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -11742,22 +11742,22 @@ define void @s_shuffle_v3p0_v4p0__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -11795,20 +11795,20 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -11856,25 +11856,25 @@ define void @s_shuffle_v3p0_v4p0__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11919,25 +11919,25 @@ define void @s_shuffle_v3p0_v4p0__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -11986,25 +11986,25 @@ define void @s_shuffle_v3p0_v4p0__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12049,23 +12049,23 @@ define void @s_shuffle_v3p0_v4p0__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12114,25 +12114,25 @@ define void @s_shuffle_v3p0_v4p0__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12177,23 +12177,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12242,25 +12242,25 @@ define void @s_shuffle_v3p0_v4p0__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12309,25 +12309,25 @@ define void @s_shuffle_v3p0_v4p0__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12372,23 +12372,23 @@ define void @s_shuffle_v3p0_v4p0__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12437,25 +12437,25 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12500,17 +12500,17 @@ define void @s_shuffle_v3p0_v4p0__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -12544,18 +12544,18 @@ define void @s_shuffle_v3p0_v4p0__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -12585,18 +12585,18 @@ define void @s_shuffle_v3p0_v4p0__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -12630,18 +12630,18 @@ define void @s_shuffle_v3p0_v4p0__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -12697,22 +12697,22 @@ define void @s_shuffle_v3p0_v4p0__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12751,22 +12751,22 @@ define void @s_shuffle_v3p0_v4p0__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12809,22 +12809,22 @@ define void @s_shuffle_v3p0_v4p0__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12863,20 +12863,20 @@ define void @s_shuffle_v3p0_v4p0__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12921,26 +12921,26 @@ define void @s_shuffle_v3p0_v4p0__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -12981,23 +12981,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13046,23 +13046,23 @@ define void @s_shuffle_v3p0_v4p0__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13103,23 +13103,23 @@ define void @s_shuffle_v3p0_v4p0__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13162,22 +13162,22 @@ define void @s_shuffle_v3p0_v4p0__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13216,22 +13216,22 @@ define void @s_shuffle_v3p0_v4p0__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13296,23 +13296,23 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13357,23 +13357,23 @@ define void @s_shuffle_v3p0_v4p0__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13418,23 +13418,23 @@ define void @s_shuffle_v3p0_v4p0__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13479,23 +13479,23 @@ define void @s_shuffle_v3p0_v4p0__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13620,20 +13620,20 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13682,26 +13682,26 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13746,23 +13746,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13811,25 +13811,25 @@ define void @s_shuffle_v3p0_v4p0__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13874,25 +13874,25 @@ define void @s_shuffle_v3p0_v4p0__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13935,22 +13935,22 @@ define void @s_shuffle_v3p0_v4p0__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -13993,22 +13993,22 @@ define void @s_shuffle_v3p0_v4p0__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14073,23 +14073,23 @@ define void @s_shuffle_v3p0_v4p0__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14134,23 +14134,23 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14195,23 +14195,23 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14256,23 +14256,23 @@ define void @s_shuffle_v3p0_v4p0__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14423,23 +14423,23 @@ define void @s_shuffle_v3p0_v4p0__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14484,23 +14484,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14545,23 +14545,23 @@ define void @s_shuffle_v3p0_v4p0__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14606,23 +14606,23 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14665,22 +14665,22 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14739,20 +14739,20 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14797,23 +14797,23 @@ define void @s_shuffle_v3p0_v4p0__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14862,25 +14862,25 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14925,25 +14925,25 @@ define void @s_shuffle_v3p0_v4p0__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14992,25 +14992,25 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15075,22 +15075,22 @@ define void @s_shuffle_v3p0_v4p0__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15133,22 +15133,22 @@ define void @s_shuffle_v3p0_v4p0__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15187,20 +15187,20 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15249,25 +15249,25 @@ define void @s_shuffle_v3p0_v4p0__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15312,23 +15312,23 @@ define void @s_shuffle_v3p0_v4p0__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15377,25 +15377,25 @@ define void @s_shuffle_v3p0_v4p0__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15440,25 +15440,25 @@ define void @s_shuffle_v3p0_v4p0__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15501,22 +15501,22 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15581,22 +15581,22 @@ define void @s_shuffle_v3p0_v4p0__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p0_v4p0__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll index ae748cba52c68..bd0100a4ffdb5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p3_v2p3__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3p3_v2p3__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3p3_v2p3__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v3p3_v2p3__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -182,21 +182,21 @@ define void @v_shuffle_v3p3_v2p3__3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -235,21 +235,21 @@ define void @v_shuffle_v3p3_v2p3__3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -283,19 +283,19 @@ define void @v_shuffle_v3p3_v2p3__3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -328,17 +328,17 @@ define void @v_shuffle_v3p3_v2p3__3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -377,20 +377,20 @@ define void @v_shuffle_v3p3_v2p3__3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -430,21 +430,21 @@ define void @v_shuffle_v3p3_v2p3__3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -479,19 +479,19 @@ define void @v_shuffle_v3p3_v2p3__3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -526,18 +526,18 @@ define void @v_shuffle_v3p3_v2p3__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -571,18 +571,18 @@ define void @v_shuffle_v3p3_v2p3__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -616,18 +616,18 @@ define void @v_shuffle_v3p3_v2p3__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> zeroinitializer store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -661,19 +661,19 @@ define void @v_shuffle_v3p3_v2p3__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -706,18 +706,18 @@ define void @v_shuffle_v3p3_v2p3__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -757,21 +757,21 @@ define void @v_shuffle_v3p3_v2p3__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -811,20 +811,20 @@ define void @v_shuffle_v3p3_v2p3__3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -865,21 +865,21 @@ define void @v_shuffle_v3p3_v2p3__3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -920,22 +920,22 @@ define void @v_shuffle_v3p3_v2p3__3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -968,18 +968,18 @@ define void @v_shuffle_v3p3_v2p3__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1011,18 +1011,18 @@ define void @v_shuffle_v3p3_v2p3__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1056,18 +1056,18 @@ define void @v_shuffle_v3p3_v2p3__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1099,18 +1099,18 @@ define void @v_shuffle_v3p3_v2p3__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1150,21 +1150,21 @@ define void @v_shuffle_v3p3_v2p3__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1204,21 +1204,21 @@ define void @v_shuffle_v3p3_v2p3__3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1259,22 +1259,22 @@ define void @v_shuffle_v3p3_v2p3__3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1316,22 +1316,22 @@ define void @v_shuffle_v3p3_v2p3__3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1373,16 +1373,16 @@ define void @v_shuffle_v3p3_v2p3__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1414,17 +1414,17 @@ define void @v_shuffle_v3p3_v2p3__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v2, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1469,19 +1469,19 @@ define void @v_shuffle_v3p3_v2p3__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1515,18 +1515,18 @@ define void @v_shuffle_v3p3_v2p3__3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1567,22 +1567,22 @@ define void @v_shuffle_v3p3_v2p3__3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1621,21 +1621,21 @@ define void @v_shuffle_v3p3_v2p3__3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1668,18 +1668,18 @@ define void @v_shuffle_v3p3_v2p3__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1719,21 +1719,21 @@ define void @v_shuffle_v3p3_v2p3__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1774,21 +1774,21 @@ define void @v_shuffle_v3p3_v2p3__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1821,18 +1821,18 @@ define void @v_shuffle_v3p3_v2p3__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1866,18 +1866,18 @@ define void @v_shuffle_v3p3_v2p3__3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1919,22 +1919,22 @@ define void @v_shuffle_v3p3_v2p3__3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1975,21 +1975,21 @@ define void @v_shuffle_v3p3_v2p3__3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2024,19 +2024,19 @@ define void @v_shuffle_v3p3_v2p3__3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v2p3__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v2p3__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2081,17 +2081,17 @@ define void @s_shuffle_v3p3_v2p3__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -2123,17 +2123,17 @@ define void @s_shuffle_v3p3_v2p3__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -2179,17 +2179,17 @@ define void @s_shuffle_v3p3_v2p3__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2230,21 +2230,21 @@ define void @s_shuffle_v3p3_v2p3__3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2283,20 +2283,20 @@ define void @s_shuffle_v3p3_v2p3__3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2331,18 +2331,18 @@ define void @s_shuffle_v3p3_v2p3__3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2402,21 +2402,21 @@ define void @s_shuffle_v3p3_v2p3__3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2457,21 +2457,21 @@ define void @s_shuffle_v3p3_v2p3__3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2508,19 +2508,19 @@ define void @s_shuffle_v3p3_v2p3__3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2575,18 +2575,18 @@ define void @s_shuffle_v3p3_v2p3__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -2641,19 +2641,19 @@ define void @s_shuffle_v3p3_v2p3__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -2687,18 +2687,18 @@ define void @s_shuffle_v3p3_v2p3__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -2740,22 +2740,22 @@ define void @s_shuffle_v3p3_v2p3__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2796,21 +2796,21 @@ define void @s_shuffle_v3p3_v2p3__3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2853,22 +2853,22 @@ define void @s_shuffle_v3p3_v2p3__3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2911,22 +2911,22 @@ define void @s_shuffle_v3p3_v2p3__3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3040,21 +3040,21 @@ define void @s_shuffle_v3p3_v2p3__3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3095,21 +3095,21 @@ define void @s_shuffle_v3p3_v2p3__3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3152,22 +3152,22 @@ define void @s_shuffle_v3p3_v2p3__3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3210,22 +3210,22 @@ define void @s_shuffle_v3p3_v2p3__3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3270,17 +3270,17 @@ define void @s_shuffle_v3p3_v2p3__0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -3312,17 +3312,17 @@ define void @s_shuffle_v3p3_v2p3__1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -3372,19 +3372,19 @@ define void @s_shuffle_v3p3_v2p3__3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3419,18 +3419,18 @@ define void @s_shuffle_v3p3_v2p3__3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3473,22 +3473,22 @@ define void @s_shuffle_v3p3_v2p3__3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3529,21 +3529,21 @@ define void @s_shuffle_v3p3_v2p3__3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3603,21 +3603,21 @@ define void @s_shuffle_v3p3_v2p3__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3658,21 +3658,21 @@ define void @s_shuffle_v3p3_v2p3__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3726,18 +3726,18 @@ define void @s_shuffle_v3p3_v2p3__3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3780,22 +3780,22 @@ define void @s_shuffle_v3p3_v2p3__3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3836,21 +3836,21 @@ define void @s_shuffle_v3p3_v2p3__3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3887,19 +3887,19 @@ define void @s_shuffle_v3p3_v2p3__3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v2p3__3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v2p3__3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll index 2a8d6a89788f4..cecd2a0e4b015 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p3_v3p3__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3p3_v3p3__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v3p3_v3p3__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v3p3_v3p3__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v3p3_v3p3__4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v3p3_v3p3__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -268,22 +268,22 @@ define void @v_shuffle_v3p3_v3p3__5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -322,21 +322,21 @@ define void @v_shuffle_v3p3_v3p3__5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -377,21 +377,21 @@ define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -425,18 +425,18 @@ define void @v_shuffle_v3p3_v3p3__5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -469,17 +469,17 @@ define void @v_shuffle_v3p3_v3p3__5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -514,18 +514,18 @@ define void @v_shuffle_v3p3_v3p3__5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -568,23 +568,23 @@ define void @v_shuffle_v3p3_v3p3__5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -627,23 +627,23 @@ define void @v_shuffle_v3p3_v3p3__5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -684,22 +684,22 @@ define void @v_shuffle_v3p3_v3p3__5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -735,20 +735,20 @@ define void @v_shuffle_v3p3_v3p3__5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -785,20 +785,20 @@ define void @v_shuffle_v3p3_v3p3__5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -833,18 +833,18 @@ define void @v_shuffle_v3p3_v3p3__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -879,18 +879,18 @@ define void @v_shuffle_v3p3_v3p3__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -924,18 +924,18 @@ define void @v_shuffle_v3p3_v3p3__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> zeroinitializer store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -970,19 +970,19 @@ define void @v_shuffle_v3p3_v3p3__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1017,19 +1017,19 @@ define void @v_shuffle_v3p3_v3p3__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1063,18 +1063,18 @@ define void @v_shuffle_v3p3_v3p3__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1116,23 +1116,23 @@ define void @v_shuffle_v3p3_v3p3__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1175,23 +1175,23 @@ define void @v_shuffle_v3p3_v3p3__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1232,22 +1232,22 @@ define void @v_shuffle_v3p3_v3p3__5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1290,23 +1290,23 @@ define void @v_shuffle_v3p3_v3p3__5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1348,22 +1348,22 @@ define void @v_shuffle_v3p3_v3p3__5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1405,21 +1405,21 @@ define void @v_shuffle_v3p3_v3p3__5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1460,22 +1460,22 @@ define void @v_shuffle_v3p3_v3p3__5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1508,17 +1508,17 @@ define void @v_shuffle_v3p3_v3p3__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1550,17 +1550,17 @@ define void @v_shuffle_v3p3_v3p3__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1594,18 +1594,18 @@ define void @v_shuffle_v3p3_v3p3__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1639,18 +1639,18 @@ define void @v_shuffle_v3p3_v3p3__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1682,17 +1682,17 @@ define void @v_shuffle_v3p3_v3p3__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1732,22 +1732,22 @@ define void @v_shuffle_v3p3_v3p3__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1788,22 +1788,22 @@ define void @v_shuffle_v3p3_v3p3__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1844,22 +1844,22 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1902,23 +1902,23 @@ define void @v_shuffle_v3p3_v3p3__5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1961,22 +1961,22 @@ define void @v_shuffle_v3p3_v3p3__5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2018,21 +2018,21 @@ define void @v_shuffle_v3p3_v3p3__5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2073,22 +2073,22 @@ define void @v_shuffle_v3p3_v3p3__5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2121,17 +2121,17 @@ define void @v_shuffle_v3p3_v3p3__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2163,17 +2163,17 @@ define void @v_shuffle_v3p3_v3p3__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2207,18 +2207,18 @@ define void @v_shuffle_v3p3_v3p3__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2252,18 +2252,18 @@ define void @v_shuffle_v3p3_v3p3__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2295,17 +2295,17 @@ define void @v_shuffle_v3p3_v3p3__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2345,21 +2345,21 @@ define void @v_shuffle_v3p3_v3p3__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2400,21 +2400,21 @@ define void @v_shuffle_v3p3_v3p3__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2453,21 +2453,21 @@ define void @v_shuffle_v3p3_v3p3__5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2510,22 +2510,22 @@ define void @v_shuffle_v3p3_v3p3__5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2564,21 +2564,21 @@ define void @v_shuffle_v3p3_v3p3__5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2619,22 +2619,22 @@ define void @v_shuffle_v3p3_v3p3__5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2675,21 +2675,21 @@ define void @v_shuffle_v3p3_v3p3__5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2731,16 +2731,16 @@ define void @v_shuffle_v3p3_v3p3__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2772,17 +2772,17 @@ define void @v_shuffle_v3p3_v3p3__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2814,17 +2814,17 @@ define void @v_shuffle_v3p3_v3p3__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2870,19 +2870,19 @@ define void @v_shuffle_v3p3_v3p3__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2918,19 +2918,19 @@ define void @v_shuffle_v3p3_v3p3__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v3p3_v3p3__5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3017,21 +3017,21 @@ define void @v_shuffle_v3p3_v3p3__5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3070,21 +3070,21 @@ define void @v_shuffle_v3p3_v3p3__5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3127,22 +3127,22 @@ define void @v_shuffle_v3p3_v3p3__5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3177,19 +3177,19 @@ define void @v_shuffle_v3p3_v3p3__5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3222,17 +3222,17 @@ define void @v_shuffle_v3p3_v3p3__u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3272,22 +3272,22 @@ define void @v_shuffle_v3p3_v3p3__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3p3_v3p3__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3384,21 +3384,21 @@ define void @v_shuffle_v3p3_v3p3__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3431,17 +3431,17 @@ define void @v_shuffle_v3p3_v3p3__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3476,18 +3476,18 @@ define void @v_shuffle_v3p3_v3p3__4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3522,18 +3522,18 @@ define void @v_shuffle_v3p3_v3p3__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3568,18 +3568,18 @@ define void @v_shuffle_v3p3_v3p3__5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3620,21 +3620,21 @@ define void @v_shuffle_v3p3_v3p3__5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3675,22 +3675,22 @@ define void @v_shuffle_v3p3_v3p3__5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3733,22 +3733,22 @@ define void @v_shuffle_v3p3_v3p3__5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3783,19 +3783,19 @@ define void @v_shuffle_v3p3_v3p3__5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3828,17 +3828,17 @@ define void @v_shuffle_v3p3_v3p3__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3879,22 +3879,22 @@ define void @v_shuffle_v3p3_v3p3__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3935,22 +3935,22 @@ define void @v_shuffle_v3p3_v3p3__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3991,21 +3991,21 @@ define void @v_shuffle_v3p3_v3p3__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4038,17 +4038,17 @@ define void @v_shuffle_v3p3_v3p3__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4083,18 +4083,18 @@ define void @v_shuffle_v3p3_v3p3__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4127,17 +4127,17 @@ define void @v_shuffle_v3p3_v3p3__5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4178,22 +4178,22 @@ define void @v_shuffle_v3p3_v3p3__5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4234,22 +4234,22 @@ define void @v_shuffle_v3p3_v3p3__5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4290,21 +4290,21 @@ define void @v_shuffle_v3p3_v3p3__5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4341,20 +4341,20 @@ define void @v_shuffle_v3p3_v3p3__5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4387,17 +4387,17 @@ define void @v_shuffle_v3p3_v3p3__5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v3p3__5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4442,17 +4442,17 @@ define void @s_shuffle_v3p3_v3p3__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -4484,17 +4484,17 @@ define void @s_shuffle_v3p3_v3p3__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -4526,17 +4526,17 @@ define void @s_shuffle_v3p3_v3p3__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -4582,17 +4582,17 @@ define void @s_shuffle_v3p3_v3p3__4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4625,17 +4625,17 @@ define void @s_shuffle_v3p3_v3p3__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4676,21 +4676,21 @@ define void @s_shuffle_v3p3_v3p3__5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4729,20 +4729,20 @@ define void @s_shuffle_v3p3_v3p3__5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4783,21 +4783,21 @@ define void @s_shuffle_v3p3_v3p3__5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4832,18 +4832,18 @@ define void @s_shuffle_v3p3_v3p3__5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4897,18 +4897,18 @@ define void @s_shuffle_v3p3_v3p3__5_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4951,22 +4951,22 @@ define void @s_shuffle_v3p3_v3p3__5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5009,22 +5009,22 @@ define void @s_shuffle_v3p3_v3p3__5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5065,21 +5065,21 @@ define void @s_shuffle_v3p3_v3p3__5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5116,19 +5116,19 @@ define void @s_shuffle_v3p3_v3p3__5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5165,19 +5165,19 @@ define void @s_shuffle_v3p3_v3p3__5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5232,18 +5232,18 @@ define void @s_shuffle_v3p3_v3p3__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -5298,19 +5298,19 @@ define void @s_shuffle_v3p3_v3p3__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -5346,19 +5346,19 @@ define void @s_shuffle_v3p3_v3p3__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -5392,18 +5392,18 @@ define void @s_shuffle_v3p3_v3p3__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -5445,22 +5445,22 @@ define void @s_shuffle_v3p3_v3p3__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5503,22 +5503,22 @@ define void @s_shuffle_v3p3_v3p3__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5559,21 +5559,21 @@ define void @s_shuffle_v3p3_v3p3__5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5616,22 +5616,22 @@ define void @s_shuffle_v3p3_v3p3__5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5674,22 +5674,22 @@ define void @s_shuffle_v3p3_v3p3__5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5732,22 +5732,22 @@ define void @s_shuffle_v3p3_v3p3__5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5788,21 +5788,21 @@ define void @s_shuffle_v3p3_v3p3__5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5935,21 +5935,21 @@ define void @s_shuffle_v3p3_v3p3__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5990,21 +5990,21 @@ define void @s_shuffle_v3p3_v3p3__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6045,21 +6045,21 @@ define void @s_shuffle_v3p3_v3p3__5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6102,22 +6102,22 @@ define void @s_shuffle_v3p3_v3p3__5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6160,22 +6160,22 @@ define void @s_shuffle_v3p3_v3p3__5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6218,22 +6218,22 @@ define void @s_shuffle_v3p3_v3p3__5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6274,21 +6274,21 @@ define void @s_shuffle_v3p3_v3p3__5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6421,21 +6421,21 @@ define void @s_shuffle_v3p3_v3p3__4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6476,21 +6476,21 @@ define void @s_shuffle_v3p3_v3p3__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6529,20 +6529,20 @@ define void @s_shuffle_v3p3_v3p3__5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6585,22 +6585,22 @@ define void @s_shuffle_v3p3_v3p3__5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6639,20 +6639,20 @@ define void @s_shuffle_v3p3_v3p3__5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6693,21 +6693,21 @@ define void @s_shuffle_v3p3_v3p3__5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6748,21 +6748,21 @@ define void @s_shuffle_v3p3_v3p3__5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6807,17 +6807,17 @@ define void @s_shuffle_v3p3_v3p3__0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -6849,17 +6849,17 @@ define void @s_shuffle_v3p3_v3p3__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -6891,17 +6891,17 @@ define void @s_shuffle_v3p3_v3p3__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -6951,19 +6951,19 @@ define void @s_shuffle_v3p3_v3p3__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7000,19 +7000,19 @@ define void @s_shuffle_v3p3_v3p3__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7047,18 +7047,18 @@ define void @s_shuffle_v3p3_v3p3__5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7101,22 +7101,22 @@ define void @s_shuffle_v3p3_v3p3__5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7157,21 +7157,21 @@ define void @s_shuffle_v3p3_v3p3__5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7214,22 +7214,22 @@ define void @s_shuffle_v3p3_v3p3__5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7266,19 +7266,19 @@ define void @s_shuffle_v3p3_v3p3__5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7338,21 +7338,21 @@ define void @s_shuffle_v3p3_v3p3__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7393,21 +7393,21 @@ define void @s_shuffle_v3p3_v3p3__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7448,21 +7448,21 @@ define void @s_shuffle_v3p3_v3p3__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7556,18 +7556,18 @@ define void @s_shuffle_v3p3_v3p3__5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7610,22 +7610,22 @@ define void @s_shuffle_v3p3_v3p3__5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7666,21 +7666,21 @@ define void @s_shuffle_v3p3_v3p3__5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7723,22 +7723,22 @@ define void @s_shuffle_v3p3_v3p3__5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7775,19 +7775,19 @@ define void @s_shuffle_v3p3_v3p3__5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7847,21 +7847,21 @@ define void @s_shuffle_v3p3_v3p3__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7902,21 +7902,21 @@ define void @s_shuffle_v3p3_v3p3__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7957,21 +7957,21 @@ define void @s_shuffle_v3p3_v3p3__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8070,21 +8070,21 @@ define void @s_shuffle_v3p3_v3p3__5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8125,21 +8125,21 @@ define void @s_shuffle_v3p3_v3p3__5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8180,21 +8180,21 @@ define void @s_shuffle_v3p3_v3p3__5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8231,19 +8231,19 @@ define void @s_shuffle_v3p3_v3p3__5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v3p3__5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v3p3__5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll index f7cbb42bef31c..834f03f013ba1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v3p3_v4p3__u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v3p3_v4p3__0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -78,17 +78,17 @@ define void @v_shuffle_v3p3_v4p3__1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -120,17 +120,17 @@ define void @v_shuffle_v3p3_v4p3__2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -162,17 +162,17 @@ define void @v_shuffle_v3p3_v4p3__3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -214,17 +214,17 @@ define void @v_shuffle_v3p3_v4p3__5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -257,17 +257,17 @@ define void @v_shuffle_v3p3_v4p3__6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -300,17 +300,17 @@ define void @v_shuffle_v3p3_v4p3__7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -351,22 +351,22 @@ define void @v_shuffle_v3p3_v4p3__7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v3p3_v4p3__7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v3p3_v4p3__7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -563,18 +563,18 @@ define void @v_shuffle_v3p3_v4p3__7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -607,17 +607,17 @@ define void @v_shuffle_v3p3_v4p3__7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -651,18 +651,18 @@ define void @v_shuffle_v3p3_v4p3__7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -751,23 +751,23 @@ define void @v_shuffle_v3p3_v4p3__7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -810,23 +810,23 @@ define void @v_shuffle_v3p3_v4p3__7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -867,22 +867,22 @@ define void @v_shuffle_v3p3_v4p3__7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -924,22 +924,22 @@ define void @v_shuffle_v3p3_v4p3__7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -975,18 +975,18 @@ define void @v_shuffle_v3p3_v4p3__7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1021,18 +1021,18 @@ define void @v_shuffle_v3p3_v4p3__7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1067,18 +1067,18 @@ define void @v_shuffle_v3p3_v4p3__7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1114,19 +1114,19 @@ define void @v_shuffle_v3p3_v4p3__7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1161,18 +1161,18 @@ define void @v_shuffle_v3p3_v4p3__u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1206,18 +1206,18 @@ define void @v_shuffle_v3p3_v4p3__0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> zeroinitializer store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1252,19 +1252,19 @@ define void @v_shuffle_v3p3_v4p3__1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1299,18 +1299,18 @@ define void @v_shuffle_v3p3_v4p3__2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1346,19 +1346,19 @@ define void @v_shuffle_v3p3_v4p3__3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1392,18 +1392,18 @@ define void @v_shuffle_v3p3_v4p3__4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1444,23 +1444,23 @@ define void @v_shuffle_v3p3_v4p3__5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1503,23 +1503,23 @@ define void @v_shuffle_v3p3_v4p3__6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1562,23 +1562,23 @@ define void @v_shuffle_v3p3_v4p3__7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1619,22 +1619,22 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1677,23 +1677,23 @@ define void @v_shuffle_v3p3_v4p3__7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1735,22 +1735,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1792,21 +1792,21 @@ define void @v_shuffle_v3p3_v4p3__7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1849,22 +1849,22 @@ define void @v_shuffle_v3p3_v4p3__7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1905,22 +1905,22 @@ define void @v_shuffle_v3p3_v4p3__7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -1962,23 +1962,23 @@ define void @v_shuffle_v3p3_v4p3__7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2011,17 +2011,17 @@ define void @v_shuffle_v3p3_v4p3__u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2053,17 +2053,17 @@ define void @v_shuffle_v3p3_v4p3__0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2097,18 +2097,18 @@ define void @v_shuffle_v3p3_v4p3__1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2142,18 +2142,18 @@ define void @v_shuffle_v3p3_v4p3__2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2187,18 +2187,18 @@ define void @v_shuffle_v3p3_v4p3__3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2230,17 +2230,17 @@ define void @v_shuffle_v3p3_v4p3__4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2280,22 +2280,22 @@ define void @v_shuffle_v3p3_v4p3__5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2336,22 +2336,22 @@ define void @v_shuffle_v3p3_v4p3__6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2392,22 +2392,22 @@ define void @v_shuffle_v3p3_v4p3__7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2448,22 +2448,22 @@ define void @v_shuffle_v3p3_v4p3__7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2506,23 +2506,23 @@ define void @v_shuffle_v3p3_v4p3__7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2565,22 +2565,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2621,21 +2621,21 @@ define void @v_shuffle_v3p3_v4p3__7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2678,22 +2678,22 @@ define void @v_shuffle_v3p3_v4p3__7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2734,22 +2734,22 @@ define void @v_shuffle_v3p3_v4p3__7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2791,23 +2791,23 @@ define void @v_shuffle_v3p3_v4p3__7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -2840,17 +2840,17 @@ define void @v_shuffle_v3p3_v4p3__u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2882,17 +2882,17 @@ define void @v_shuffle_v3p3_v4p3__0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2925,18 +2925,18 @@ define void @v_shuffle_v3p3_v4p3__1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2970,18 +2970,18 @@ define void @v_shuffle_v3p3_v4p3__2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3015,18 +3015,18 @@ define void @v_shuffle_v3p3_v4p3__3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3058,17 +3058,17 @@ define void @v_shuffle_v3p3_v4p3__4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3108,21 +3108,21 @@ define void @v_shuffle_v3p3_v4p3__5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3163,21 +3163,21 @@ define void @v_shuffle_v3p3_v4p3__6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3218,21 +3218,21 @@ define void @v_shuffle_v3p3_v4p3__7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3271,21 +3271,21 @@ define void @v_shuffle_v3p3_v4p3__7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3328,22 +3328,22 @@ define void @v_shuffle_v3p3_v4p3__7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3382,21 +3382,21 @@ define void @v_shuffle_v3p3_v4p3__7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3437,21 +3437,21 @@ define void @v_shuffle_v3p3_v4p3__7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v3p3_v4p3__7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3548,21 +3548,21 @@ define void @v_shuffle_v3p3_v4p3__7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3603,22 +3603,22 @@ define void @v_shuffle_v3p3_v4p3__7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3652,18 +3652,18 @@ define void @v_shuffle_v3p3_v4p3__u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3697,18 +3697,18 @@ define void @v_shuffle_v3p3_v4p3__0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3742,19 +3742,19 @@ define void @v_shuffle_v3p3_v4p3__1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3789,19 +3789,19 @@ define void @v_shuffle_v3p3_v4p3__2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3836,19 +3836,19 @@ define void @v_shuffle_v3p3_v4p3__3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3881,18 +3881,18 @@ define void @v_shuffle_v3p3_v4p3__4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3933,22 +3933,22 @@ define void @v_shuffle_v3p3_v4p3__5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -3990,22 +3990,22 @@ define void @v_shuffle_v3p3_v4p3__6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4047,22 +4047,22 @@ define void @v_shuffle_v3p3_v4p3__7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4102,21 +4102,21 @@ define void @v_shuffle_v3p3_v4p3__7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4158,22 +4158,22 @@ define void @v_shuffle_v3p3_v4p3__7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4214,21 +4214,21 @@ define void @v_shuffle_v3p3_v4p3__7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4269,22 +4269,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4326,22 +4326,22 @@ define void @v_shuffle_v3p3_v4p3__7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4382,21 +4382,21 @@ define void @v_shuffle_v3p3_v4p3__7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4438,22 +4438,22 @@ define void @v_shuffle_v3p3_v4p3__7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4495,16 +4495,16 @@ define void @v_shuffle_v3p3_v4p3__0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4535,17 +4535,17 @@ define void @v_shuffle_v3p3_v4p3__1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4577,17 +4577,17 @@ define void @v_shuffle_v3p3_v4p3__2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4619,17 +4619,17 @@ define void @v_shuffle_v3p3_v4p3__3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> store <3 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4675,19 +4675,19 @@ define void @v_shuffle_v3p3_v4p3__5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4723,18 +4723,18 @@ define void @v_shuffle_v3p3_v4p3__6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4771,19 +4771,19 @@ define void @v_shuffle_v3p3_v4p3__7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4818,18 +4818,18 @@ define void @v_shuffle_v3p3_v4p3__7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4872,23 +4872,23 @@ define void @v_shuffle_v3p3_v4p3__7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4927,21 +4927,21 @@ define void @v_shuffle_v3p3_v4p3__7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -4982,22 +4982,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5040,22 +5040,22 @@ define void @v_shuffle_v3p3_v4p3__7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5092,19 +5092,19 @@ define void @v_shuffle_v3p3_v4p3__7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5140,20 +5140,20 @@ define void @v_shuffle_v3p3_v4p3__7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5186,17 +5186,17 @@ define void @v_shuffle_v3p3_v4p3__u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5236,22 +5236,22 @@ define void @v_shuffle_v3p3_v4p3__0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5291,22 +5291,22 @@ define void @v_shuffle_v3p3_v4p3__1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5347,21 +5347,21 @@ define void @v_shuffle_v3p3_v4p3__2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5402,21 +5402,21 @@ define void @v_shuffle_v3p3_v4p3__3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5449,17 +5449,17 @@ define void @v_shuffle_v3p3_v4p3__4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5494,18 +5494,18 @@ define void @v_shuffle_v3p3_v4p3__5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5540,18 +5540,18 @@ define void @v_shuffle_v3p3_v4p3__6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5586,18 +5586,18 @@ define void @v_shuffle_v3p3_v4p3__7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5632,18 +5632,18 @@ define void @v_shuffle_v3p3_v4p3__7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5686,23 +5686,23 @@ define void @v_shuffle_v3p3_v4p3__7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5743,22 +5743,22 @@ define void @v_shuffle_v3p3_v4p3__7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5800,22 +5800,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5858,22 +5858,22 @@ define void @v_shuffle_v3p3_v4p3__7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5910,19 +5910,19 @@ define void @v_shuffle_v3p3_v4p3__7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -5958,20 +5958,20 @@ define void @v_shuffle_v3p3_v4p3__7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6004,17 +6004,17 @@ define void @v_shuffle_v3p3_v4p3__u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6055,22 +6055,22 @@ define void @v_shuffle_v3p3_v4p3__0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6111,22 +6111,22 @@ define void @v_shuffle_v3p3_v4p3__1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6167,21 +6167,21 @@ define void @v_shuffle_v3p3_v4p3__2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6222,21 +6222,21 @@ define void @v_shuffle_v3p3_v4p3__3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6269,17 +6269,17 @@ define void @v_shuffle_v3p3_v4p3__4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6313,18 +6313,18 @@ define void @v_shuffle_v3p3_v4p3__5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6359,18 +6359,18 @@ define void @v_shuffle_v3p3_v4p3__6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6405,18 +6405,18 @@ define void @v_shuffle_v3p3_v4p3__7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6449,17 +6449,17 @@ define void @v_shuffle_v3p3_v4p3__7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6500,22 +6500,22 @@ define void @v_shuffle_v3p3_v4p3__7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v3p3_v4p3__7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6612,21 +6612,21 @@ define void @v_shuffle_v3p3_v4p3__7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v2 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v2 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6667,21 +6667,21 @@ define void @v_shuffle_v3p3_v4p3__7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6717,20 +6717,20 @@ define void @v_shuffle_v3p3_v4p3__7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6763,17 +6763,17 @@ define void @v_shuffle_v3p3_v4p3__7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6807,18 +6807,18 @@ define void @v_shuffle_v3p3_v4p3__u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6859,22 +6859,22 @@ define void @v_shuffle_v3p3_v4p3__0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6916,22 +6916,22 @@ define void @v_shuffle_v3p3_v4p3__1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -6973,22 +6973,22 @@ define void @v_shuffle_v3p3_v4p3__2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7030,22 +7030,22 @@ define void @v_shuffle_v3p3_v4p3__3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7080,18 +7080,18 @@ define void @v_shuffle_v3p3_v4p3__4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7126,19 +7126,19 @@ define void @v_shuffle_v3p3_v4p3__5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7174,19 +7174,19 @@ define void @v_shuffle_v3p3_v4p3__6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7220,18 +7220,18 @@ define void @v_shuffle_v3p3_v4p3__7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7273,23 +7273,23 @@ define void @v_shuffle_v3p3_v4p3__7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7330,22 +7330,22 @@ define void @v_shuffle_v3p3_v4p3__7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7387,22 +7387,22 @@ define void @v_shuffle_v3p3_v4p3__7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7444,22 +7444,22 @@ define void @v_shuffle_v3p3_v4p3__7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7495,20 +7495,20 @@ define void @v_shuffle_v3p3_v4p3__7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7543,18 +7543,18 @@ define void @v_shuffle_v3p3_v4p3__7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7589,19 +7589,19 @@ define void @v_shuffle_v3p3_v4p3__7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v3p3_v4p3__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7646,17 +7646,17 @@ define void @s_shuffle_v3p3_v4p3__0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -7688,17 +7688,17 @@ define void @s_shuffle_v3p3_v4p3__1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -7730,17 +7730,17 @@ define void @s_shuffle_v3p3_v4p3__2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -7772,17 +7772,17 @@ define void @s_shuffle_v3p3_v4p3__3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -7828,17 +7828,17 @@ define void @s_shuffle_v3p3_v4p3__5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7871,17 +7871,17 @@ define void @s_shuffle_v3p3_v4p3__6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7914,17 +7914,17 @@ define void @s_shuffle_v3p3_v4p3__7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -7965,21 +7965,21 @@ define void @s_shuffle_v3p3_v4p3__7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8018,20 +8018,20 @@ define void @s_shuffle_v3p3_v4p3__7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8072,21 +8072,21 @@ define void @s_shuffle_v3p3_v4p3__7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8127,21 +8127,21 @@ define void @s_shuffle_v3p3_v4p3__7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8176,18 +8176,18 @@ define void @s_shuffle_v3p3_v4p3__7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8241,18 +8241,18 @@ define void @s_shuffle_v3p3_v4p3__7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8287,18 +8287,18 @@ define void @s_shuffle_v3p3_v4p3__7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8341,22 +8341,22 @@ define void @s_shuffle_v3p3_v4p3__7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8399,22 +8399,22 @@ define void @s_shuffle_v3p3_v4p3__7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8455,21 +8455,21 @@ define void @s_shuffle_v3p3_v4p3__7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8512,22 +8512,22 @@ define void @s_shuffle_v3p3_v4p3__7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8564,19 +8564,19 @@ define void @s_shuffle_v3p3_v4p3__7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8613,19 +8613,19 @@ define void @s_shuffle_v3p3_v4p3__7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8682,19 +8682,19 @@ define void @s_shuffle_v3p3_v4p3__7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -8729,18 +8729,18 @@ define void @s_shuffle_v3p3_v4p3__u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -8795,19 +8795,19 @@ define void @s_shuffle_v3p3_v4p3__1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -8843,19 +8843,19 @@ define void @s_shuffle_v3p3_v4p3__2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -8891,19 +8891,19 @@ define void @s_shuffle_v3p3_v4p3__3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -8937,18 +8937,18 @@ define void @s_shuffle_v3p3_v4p3__4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -8990,22 +8990,22 @@ define void @s_shuffle_v3p3_v4p3__5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9048,22 +9048,22 @@ define void @s_shuffle_v3p3_v4p3__6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9106,22 +9106,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9162,21 +9162,21 @@ define void @s_shuffle_v3p3_v4p3__7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9219,22 +9219,22 @@ define void @s_shuffle_v3p3_v4p3__7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9277,22 +9277,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9335,22 +9335,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9393,22 +9393,22 @@ define void @s_shuffle_v3p3_v4p3__7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9449,21 +9449,21 @@ define void @s_shuffle_v3p3_v4p3__7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9506,22 +9506,22 @@ define void @s_shuffle_v3p3_v4p3__7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9673,21 +9673,21 @@ define void @s_shuffle_v3p3_v4p3__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9728,21 +9728,21 @@ define void @s_shuffle_v3p3_v4p3__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9783,21 +9783,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9838,21 +9838,21 @@ define void @s_shuffle_v3p3_v4p3__7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9895,22 +9895,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -9953,22 +9953,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10011,22 +10011,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10069,22 +10069,22 @@ define void @s_shuffle_v3p3_v4p3__7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10125,21 +10125,21 @@ define void @s_shuffle_v3p3_v4p3__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10182,22 +10182,22 @@ define void @s_shuffle_v3p3_v4p3__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10349,21 +10349,21 @@ define void @s_shuffle_v3p3_v4p3__5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10404,21 +10404,21 @@ define void @s_shuffle_v3p3_v4p3__6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10459,21 +10459,21 @@ define void @s_shuffle_v3p3_v4p3__7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10512,20 +10512,20 @@ define void @s_shuffle_v3p3_v4p3__7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10568,22 +10568,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10622,20 +10622,20 @@ define void @s_shuffle_v3p3_v4p3__7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10676,21 +10676,21 @@ define void @s_shuffle_v3p3_v4p3__7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10731,21 +10731,21 @@ define void @s_shuffle_v3p3_v4p3__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10786,21 +10786,21 @@ define void @s_shuffle_v3p3_v4p3__7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10841,21 +10841,21 @@ define void @s_shuffle_v3p3_v4p3__7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -10890,18 +10890,18 @@ define void @s_shuffle_v3p3_v4p3__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -10956,19 +10956,19 @@ define void @s_shuffle_v3p3_v4p3__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11004,19 +11004,19 @@ define void @s_shuffle_v3p3_v4p3__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11052,19 +11052,19 @@ define void @s_shuffle_v3p3_v4p3__3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11098,18 +11098,18 @@ define void @s_shuffle_v3p3_v4p3__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11151,22 +11151,22 @@ define void @s_shuffle_v3p3_v4p3__5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11209,22 +11209,22 @@ define void @s_shuffle_v3p3_v4p3__6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11267,22 +11267,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11323,21 +11323,21 @@ define void @s_shuffle_v3p3_v4p3__7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11380,22 +11380,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11436,21 +11436,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11493,22 +11493,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11551,22 +11551,22 @@ define void @s_shuffle_v3p3_v4p3__7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11607,21 +11607,21 @@ define void @s_shuffle_v3p3_v4p3__7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11664,22 +11664,22 @@ define void @s_shuffle_v3p3_v4p3__7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11724,17 +11724,17 @@ define void @s_shuffle_v3p3_v4p3__0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11766,17 +11766,17 @@ define void @s_shuffle_v3p3_v4p3__1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11808,17 +11808,17 @@ define void @s_shuffle_v3p3_v4p3__2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11850,17 +11850,17 @@ define void @s_shuffle_v3p3_v4p3__3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x ptr addrspace(3)> %shuf) @@ -11910,19 +11910,19 @@ define void @s_shuffle_v3p3_v4p3__5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -11959,19 +11959,19 @@ define void @s_shuffle_v3p3_v4p3__6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12008,19 +12008,19 @@ define void @s_shuffle_v3p3_v4p3__7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12055,18 +12055,18 @@ define void @s_shuffle_v3p3_v4p3__7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12109,22 +12109,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12165,21 +12165,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12222,22 +12222,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12280,22 +12280,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12332,19 +12332,19 @@ define void @s_shuffle_v3p3_v4p3__7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12381,19 +12381,19 @@ define void @s_shuffle_v3p3_v4p3__7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12453,21 +12453,21 @@ define void @s_shuffle_v3p3_v4p3__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12508,21 +12508,21 @@ define void @s_shuffle_v3p3_v4p3__1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12563,21 +12563,21 @@ define void @s_shuffle_v3p3_v4p3__2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12618,21 +12618,21 @@ define void @s_shuffle_v3p3_v4p3__3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12746,18 +12746,18 @@ define void @s_shuffle_v3p3_v4p3__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12800,22 +12800,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12856,21 +12856,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12913,22 +12913,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -12971,22 +12971,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13023,19 +13023,19 @@ define void @s_shuffle_v3p3_v4p3__7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13072,19 +13072,19 @@ define void @s_shuffle_v3p3_v4p3__7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13144,21 +13144,21 @@ define void @s_shuffle_v3p3_v4p3__0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13199,21 +13199,21 @@ define void @s_shuffle_v3p3_v4p3__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13254,21 +13254,21 @@ define void @s_shuffle_v3p3_v4p3__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13309,21 +13309,21 @@ define void @s_shuffle_v3p3_v4p3__3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13462,21 +13462,21 @@ define void @s_shuffle_v3p3_v4p3__7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13517,21 +13517,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13572,21 +13572,21 @@ define void @s_shuffle_v3p3_v4p3__7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13627,21 +13627,21 @@ define void @s_shuffle_v3p3_v4p3__7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13678,19 +13678,19 @@ define void @s_shuffle_v3p3_v4p3__7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13744,18 +13744,18 @@ define void @s_shuffle_v3p3_v4p3__u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13796,21 +13796,21 @@ define void @s_shuffle_v3p3_v4p3__0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13853,22 +13853,22 @@ define void @s_shuffle_v3p3_v4p3__1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13911,22 +13911,22 @@ define void @s_shuffle_v3p3_v4p3__2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -13969,22 +13969,22 @@ define void @s_shuffle_v3p3_v4p3__3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14041,19 +14041,19 @@ define void @s_shuffle_v3p3_v4p3__5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14090,19 +14090,19 @@ define void @s_shuffle_v3p3_v4p3__6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14137,18 +14137,18 @@ define void @s_shuffle_v3p3_v4p3__7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14191,22 +14191,22 @@ define void @s_shuffle_v3p3_v4p3__7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14247,21 +14247,21 @@ define void @s_shuffle_v3p3_v4p3__7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14304,22 +14304,22 @@ define void @s_shuffle_v3p3_v4p3__7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14362,22 +14362,22 @@ define void @s_shuffle_v3p3_v4p3__7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14414,19 +14414,19 @@ define void @s_shuffle_v3p3_v4p3__7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> @@ -14483,19 +14483,19 @@ define void @s_shuffle_v3p3_v4p3__7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v3p3_v4p3__7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v3p3_v4p3__7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v2bf16.ll index 979b46507a266..670b220b219c7 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v2bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4bf16_v2bf16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4bf16_v2bf16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4bf16_v2bf16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v4bf16_v2bf16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4bf16_v2bf16__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v4bf16_v2bf16__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -381,21 +381,21 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -438,22 +438,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -488,19 +488,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -537,19 +537,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -592,23 +592,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -651,23 +651,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -704,19 +704,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -753,19 +753,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -802,19 +802,19 @@ define void @v_shuffle_v4bf16_v2bf16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -850,19 +850,19 @@ define void @v_shuffle_v4bf16_v2bf16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> zeroinitializer store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -898,19 +898,19 @@ define void @v_shuffle_v4bf16_v2bf16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -946,19 +946,19 @@ define void @v_shuffle_v4bf16_v2bf16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1000,22 +1000,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1058,22 +1058,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1118,23 +1118,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1177,22 +1177,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1237,23 +1237,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1296,22 +1296,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1354,22 +1354,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1414,23 +1414,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1465,19 +1465,19 @@ define void @v_shuffle_v4bf16_v2bf16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1511,19 +1511,19 @@ define void @v_shuffle_v4bf16_v2bf16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1559,19 +1559,19 @@ define void @v_shuffle_v4bf16_v2bf16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4bf16_v2bf16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1659,22 +1659,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1717,22 +1717,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1775,22 +1775,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1833,22 +1833,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1891,22 +1891,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -1947,21 +1947,21 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2002,21 +2002,21 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2061,23 +2061,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2119,16 +2119,16 @@ define void @v_shuffle_v4bf16_v2bf16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2160,17 +2160,17 @@ define void @v_shuffle_v4bf16_v2bf16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2217,19 +2217,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2266,19 +2266,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2321,23 +2321,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2382,23 +2382,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2437,20 +2437,20 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2487,19 +2487,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2544,23 +2544,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2603,23 +2603,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2654,19 +2654,19 @@ define void @v_shuffle_v4bf16_v2bf16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2711,23 +2711,23 @@ define void @v_shuffle_v4bf16_v2bf16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v4bf16_v2bf16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2820,19 +2820,19 @@ define void @v_shuffle_v4bf16_v2bf16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2869,19 +2869,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2924,23 +2924,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -2983,22 +2983,22 @@ define void @v_shuffle_v4bf16_v2bf16__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3035,19 +3035,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3082,19 +3082,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3139,23 +3139,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3198,23 +3198,23 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3249,19 +3249,19 @@ define void @v_shuffle_v4bf16_v2bf16__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v2bf16__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=v"() %vec1 = call <2 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3306,17 +3306,17 @@ define void @s_shuffle_v4bf16_v2bf16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -3348,17 +3348,17 @@ define void @s_shuffle_v4bf16_v2bf16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -3404,17 +3404,17 @@ define void @s_shuffle_v4bf16_v2bf16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3455,21 +3455,21 @@ define void @s_shuffle_v4bf16_v2bf16__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3512,22 +3512,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3562,18 +3562,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3608,18 +3608,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3660,21 +3660,21 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3717,22 +3717,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3767,18 +3767,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3813,18 +3813,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3867,22 +3867,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3927,23 +3927,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -3980,19 +3980,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4029,19 +4029,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4076,18 +4076,18 @@ define void @s_shuffle_v4bf16_v2bf16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4121,18 +4121,18 @@ define void @s_shuffle_v4bf16_v2bf16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4168,19 +4168,19 @@ define void @s_shuffle_v4bf16_v2bf16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4214,18 +4214,18 @@ define void @s_shuffle_v4bf16_v2bf16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4267,22 +4267,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4323,21 +4323,21 @@ define void @s_shuffle_v4bf16_v2bf16__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4382,23 +4382,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4441,22 +4441,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4499,22 +4499,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4557,22 +4557,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4617,23 +4617,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4676,22 +4676,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4726,18 +4726,18 @@ define void @s_shuffle_v4bf16_v2bf16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4771,18 +4771,18 @@ define void @s_shuffle_v4bf16_v2bf16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4818,19 +4818,19 @@ define void @s_shuffle_v4bf16_v2bf16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4864,18 +4864,18 @@ define void @s_shuffle_v4bf16_v2bf16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -4919,23 +4919,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -4978,22 +4978,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5038,23 +5038,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5099,23 +5099,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5160,23 +5160,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5217,21 +5217,21 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5272,21 +5272,21 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5331,23 +5331,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5392,17 +5392,17 @@ define void @s_shuffle_v4bf16_v2bf16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -5434,17 +5434,17 @@ define void @s_shuffle_v4bf16_v2bf16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -5494,19 +5494,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5541,18 +5541,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5595,22 +5595,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5655,23 +5655,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5708,19 +5708,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5757,19 +5757,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5812,22 +5812,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5872,23 +5872,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5923,18 +5923,18 @@ define void @s_shuffle_v4bf16_v2bf16__u_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -5977,22 +5977,22 @@ define void @s_shuffle_v4bf16_v2bf16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6037,23 +6037,23 @@ define void @s_shuffle_v4bf16_v2bf16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6088,18 +6088,18 @@ define void @s_shuffle_v4bf16_v2bf16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6134,18 +6134,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s8, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s8, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6188,22 +6188,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6248,23 +6248,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6301,19 +6301,19 @@ define void @s_shuffle_v4bf16_v2bf16__3_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6348,18 +6348,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6402,22 +6402,22 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6462,23 +6462,23 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> @@ -6513,18 +6513,18 @@ define void @s_shuffle_v4bf16_v2bf16__3_3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v2bf16__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x bfloat> asm "; def $0", "=s"() %vec1 = call <2 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <2 x bfloat> %vec0, <2 x bfloat> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll index 456c8dcbcd8ef..fa422e48bbce0 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v3bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4bf16_v3bf16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v4bf16_v3bf16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v4bf16_v3bf16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -124,17 +124,17 @@ define void @v_shuffle_v4bf16_v3bf16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v4bf16_v3bf16__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -224,17 +224,17 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -277,22 +277,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -335,22 +335,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -393,22 +393,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -445,18 +445,18 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -493,18 +493,18 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -541,18 +541,18 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -597,22 +597,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -657,22 +657,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -715,22 +715,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -769,20 +769,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -821,20 +821,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -871,18 +871,18 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -927,23 +927,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -990,23 +990,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1051,23 +1051,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1106,20 +1106,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1160,20 +1160,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1212,19 +1212,19 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1263,19 +1263,19 @@ define void @v_shuffle_v4bf16_v3bf16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -1312,19 +1312,19 @@ define void @v_shuffle_v4bf16_v3bf16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> zeroinitializer @@ -1361,19 +1361,19 @@ define void @v_shuffle_v4bf16_v3bf16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -1410,20 +1410,20 @@ define void @v_shuffle_v4bf16_v3bf16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -1460,19 +1460,19 @@ define void @v_shuffle_v4bf16_v3bf16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -1515,22 +1515,22 @@ define void @v_shuffle_v4bf16_v3bf16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1575,23 +1575,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1636,22 +1636,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1698,23 +1698,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1759,23 +1759,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1820,22 +1820,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1882,24 +1882,24 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -1944,22 +1944,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2004,22 +2004,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2064,22 +2064,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2124,22 +2124,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2184,23 +2184,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2245,23 +2245,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2298,18 +2298,18 @@ define void @v_shuffle_v4bf16_v3bf16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -2344,18 +2344,18 @@ define void @v_shuffle_v4bf16_v3bf16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -2392,19 +2392,19 @@ define void @v_shuffle_v4bf16_v3bf16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -2443,20 +2443,20 @@ define void @v_shuffle_v4bf16_v3bf16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -2491,18 +2491,18 @@ define void @v_shuffle_v4bf16_v3bf16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -2545,23 +2545,23 @@ define void @v_shuffle_v4bf16_v3bf16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2608,23 +2608,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2669,22 +2669,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2731,23 +2731,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2794,23 +2794,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2857,24 +2857,24 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2921,24 +2921,24 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -2985,24 +2985,24 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3047,22 +3047,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3107,22 +3107,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3169,24 +3169,24 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3233,23 +3233,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3296,23 +3296,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3351,20 +3351,20 @@ define void @v_shuffle_v4bf16_v3bf16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -3401,19 +3401,19 @@ define void @v_shuffle_v4bf16_v3bf16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -3450,20 +3450,20 @@ define void @v_shuffle_v4bf16_v3bf16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -3500,19 +3500,19 @@ define void @v_shuffle_v4bf16_v3bf16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -3549,20 +3549,20 @@ define void @v_shuffle_v4bf16_v3bf16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -3605,23 +3605,23 @@ define void @v_shuffle_v4bf16_v3bf16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3666,23 +3666,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3727,22 +3727,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3787,22 +3787,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3849,23 +3849,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3910,22 +3910,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -3972,23 +3972,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4033,22 +4033,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4093,22 +4093,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4153,22 +4153,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4213,22 +4213,22 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4273,23 +4273,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4334,23 +4334,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4395,16 +4395,16 @@ define void @v_shuffle_v4bf16_v3bf16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -4437,17 +4437,17 @@ define void @v_shuffle_v4bf16_v3bf16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -4480,17 +4480,17 @@ define void @v_shuffle_v4bf16_v3bf16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -4539,19 +4539,19 @@ define void @v_shuffle_v4bf16_v3bf16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4590,20 +4590,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4641,20 +4641,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4699,23 +4699,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4762,23 +4762,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4823,23 +4823,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4880,20 +4880,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4932,20 +4932,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -4984,20 +4984,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5042,23 +5042,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5103,23 +5103,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5164,23 +5164,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5219,20 +5219,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5269,18 +5269,18 @@ define void @v_shuffle_v4bf16_v3bf16__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5327,23 +5327,23 @@ define void @v_shuffle_v4bf16_v3bf16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5388,23 +5388,23 @@ define void @v_shuffle_v4bf16_v3bf16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5451,23 +5451,23 @@ define void @v_shuffle_v4bf16_v3bf16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5504,18 +5504,18 @@ define void @v_shuffle_v4bf16_v3bf16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5554,19 +5554,19 @@ define void @v_shuffle_v4bf16_v3bf16__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5607,20 +5607,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5658,20 +5658,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5718,23 +5718,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5781,23 +5781,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5844,23 +5844,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5901,20 +5901,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -5955,20 +5955,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6007,20 +6007,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6067,23 +6067,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6130,23 +6130,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6193,23 +6193,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6248,20 +6248,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6300,20 +6300,20 @@ define void @v_shuffle_v4bf16_v3bf16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6358,23 +6358,23 @@ define void @v_shuffle_v4bf16_v3bf16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6419,23 +6419,23 @@ define void @v_shuffle_v4bf16_v3bf16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6480,23 +6480,23 @@ define void @v_shuffle_v4bf16_v3bf16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6535,19 +6535,19 @@ define void @v_shuffle_v4bf16_v3bf16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6586,20 +6586,20 @@ define void @v_shuffle_v4bf16_v3bf16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6637,20 +6637,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6695,23 +6695,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6758,23 +6758,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6819,23 +6819,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6874,19 +6874,19 @@ define void @v_shuffle_v4bf16_v3bf16__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6927,20 +6927,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -6979,19 +6979,19 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7036,23 +7036,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7097,23 +7097,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7158,23 +7158,23 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7213,20 +7213,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7265,20 +7265,20 @@ define void @v_shuffle_v4bf16_v3bf16__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v3bf16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7326,17 +7326,17 @@ define void @s_shuffle_v4bf16_v3bf16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -7369,17 +7369,17 @@ define void @s_shuffle_v4bf16_v3bf16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -7412,17 +7412,17 @@ define void @s_shuffle_v4bf16_v3bf16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -7470,17 +7470,17 @@ define void @s_shuffle_v4bf16_v3bf16__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7515,17 +7515,17 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7566,20 +7566,20 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7622,21 +7622,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7677,20 +7677,20 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7725,17 +7725,17 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7772,18 +7772,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7818,17 +7818,17 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7871,21 +7871,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7928,21 +7928,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -7983,20 +7983,20 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8033,18 +8033,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8081,18 +8081,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8156,21 +8156,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8215,22 +8215,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8273,21 +8273,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8324,18 +8324,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8374,19 +8374,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8423,18 +8423,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8471,18 +8471,18 @@ define void @s_shuffle_v4bf16_v3bf16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -8517,18 +8517,18 @@ define void @s_shuffle_v4bf16_v3bf16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> zeroinitializer @@ -8565,19 +8565,19 @@ define void @s_shuffle_v4bf16_v3bf16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -8612,18 +8612,18 @@ define void @s_shuffle_v4bf16_v3bf16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -8658,18 +8658,18 @@ define void @s_shuffle_v4bf16_v3bf16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -8712,22 +8712,22 @@ define void @s_shuffle_v4bf16_v3bf16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8770,21 +8770,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8827,21 +8827,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8886,22 +8886,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -8944,21 +8944,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9001,21 +9001,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9060,22 +9060,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9118,21 +9118,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9175,21 +9175,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9234,22 +9234,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9292,21 +9292,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9349,21 +9349,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9408,22 +9408,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9460,18 +9460,18 @@ define void @s_shuffle_v4bf16_v3bf16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -9506,18 +9506,18 @@ define void @s_shuffle_v4bf16_v3bf16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -9554,19 +9554,19 @@ define void @s_shuffle_v4bf16_v3bf16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -9603,19 +9603,19 @@ define void @s_shuffle_v4bf16_v3bf16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -9650,18 +9650,18 @@ define void @s_shuffle_v4bf16_v3bf16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -9706,23 +9706,23 @@ define void @s_shuffle_v4bf16_v3bf16__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9767,22 +9767,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9827,22 +9827,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9887,22 +9887,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -9947,22 +9947,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10007,22 +10007,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10069,23 +10069,23 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10130,22 +10130,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10188,21 +10188,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10245,21 +10245,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10304,22 +10304,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10364,22 +10364,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10426,23 +10426,23 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10479,18 +10479,18 @@ define void @s_shuffle_v4bf16_v3bf16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -10525,18 +10525,18 @@ define void @s_shuffle_v4bf16_v3bf16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -10573,19 +10573,19 @@ define void @s_shuffle_v4bf16_v3bf16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -10620,18 +10620,18 @@ define void @s_shuffle_v4bf16_v3bf16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -10666,18 +10666,18 @@ define void @s_shuffle_v4bf16_v3bf16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -10720,22 +10720,22 @@ define void @s_shuffle_v4bf16_v3bf16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10778,21 +10778,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10835,21 +10835,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10892,21 +10892,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -10951,22 +10951,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11009,21 +11009,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11068,22 +11068,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11126,21 +11126,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11183,21 +11183,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11240,21 +11240,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11299,22 +11299,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11357,21 +11357,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11416,22 +11416,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11479,17 +11479,17 @@ define void @s_shuffle_v4bf16_v3bf16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -11522,17 +11522,17 @@ define void @s_shuffle_v4bf16_v3bf16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -11565,17 +11565,17 @@ define void @s_shuffle_v4bf16_v3bf16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> %shuf = shufflevector <3 x bfloat> %extract3, <3 x bfloat> poison, <4 x i32> @@ -11627,19 +11627,19 @@ define void @s_shuffle_v4bf16_v3bf16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11676,18 +11676,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11724,18 +11724,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11778,21 +11778,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11837,22 +11837,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11895,21 +11895,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11948,19 +11948,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -11997,18 +11997,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12045,18 +12045,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12099,21 +12099,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12158,22 +12158,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12216,21 +12216,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12269,19 +12269,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12318,18 +12318,18 @@ define void @s_shuffle_v4bf16_v3bf16__u_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12374,22 +12374,22 @@ define void @s_shuffle_v4bf16_v3bf16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12436,23 +12436,23 @@ define void @s_shuffle_v4bf16_v3bf16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12497,22 +12497,22 @@ define void @s_shuffle_v4bf16_v3bf16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12549,18 +12549,18 @@ define void @s_shuffle_v4bf16_v3bf16__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12599,19 +12599,19 @@ define void @s_shuffle_v4bf16_v3bf16__4_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12650,19 +12650,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12701,19 +12701,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12758,22 +12758,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12820,23 +12820,23 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12881,22 +12881,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12935,19 +12935,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -12986,19 +12986,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13035,18 +13035,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13091,22 +13091,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13153,23 +13153,23 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13214,22 +13214,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13266,18 +13266,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13314,18 +13314,18 @@ define void @s_shuffle_v4bf16_v3bf16__u_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13368,21 +13368,21 @@ define void @s_shuffle_v4bf16_v3bf16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13427,22 +13427,22 @@ define void @s_shuffle_v4bf16_v3bf16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13485,21 +13485,21 @@ define void @s_shuffle_v4bf16_v3bf16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13536,18 +13536,18 @@ define void @s_shuffle_v4bf16_v3bf16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13586,19 +13586,19 @@ define void @s_shuffle_v4bf16_v3bf16__4_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13635,18 +13635,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13689,21 +13689,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13748,22 +13748,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13806,21 +13806,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13857,18 +13857,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13907,19 +13907,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -13956,18 +13956,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -14010,21 +14010,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -14069,22 +14069,22 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -14127,21 +14127,21 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -14178,18 +14178,18 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> @@ -14228,19 +14228,19 @@ define void @s_shuffle_v4bf16_v3bf16__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v3bf16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %extract3 = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll index 9a1f22f167d16..ab297c02fe3b5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4bf16.v4bf16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4bf16_v4bf16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4bf16_v4bf16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4bf16_v4bf16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -121,17 +121,17 @@ define void @v_shuffle_v4bf16_v4bf16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v4bf16_v4bf16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v4bf16_v4bf16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4bf16_v4bf16__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -351,21 +351,21 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -406,22 +406,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -460,21 +460,21 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -515,22 +515,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -563,17 +563,17 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -608,18 +608,18 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -652,17 +652,17 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -751,22 +751,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -809,22 +809,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -865,22 +865,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -923,22 +923,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -975,20 +975,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1025,20 +1025,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1073,18 +1073,18 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1121,20 +1121,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1177,23 +1177,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1236,23 +1236,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1295,23 +1295,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1354,23 +1354,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1407,20 +1407,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1457,20 +1457,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1507,19 +1507,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1556,19 +1556,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4bf16_v4bf16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1653,19 +1653,19 @@ define void @v_shuffle_v4bf16_v4bf16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> zeroinitializer store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1701,19 +1701,19 @@ define void @v_shuffle_v4bf16_v4bf16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1749,20 +1749,20 @@ define void @v_shuffle_v4bf16_v4bf16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1798,20 +1798,20 @@ define void @v_shuffle_v4bf16_v4bf16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1847,19 +1847,19 @@ define void @v_shuffle_v4bf16_v4bf16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1901,22 +1901,22 @@ define void @v_shuffle_v4bf16_v4bf16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -1959,23 +1959,23 @@ define void @v_shuffle_v4bf16_v4bf16__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2018,22 +2018,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2076,22 +2076,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2136,23 +2136,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2195,23 +2195,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2256,23 +2256,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2315,22 +2315,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2375,24 +2375,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2435,22 +2435,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2495,24 +2495,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2555,22 +2555,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2613,22 +2613,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2673,24 +2673,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2793,23 +2793,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2852,23 +2852,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4bf16_v4bf16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4bf16_v4bf16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4bf16_v4bf16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3106,20 +3106,20 @@ define void @v_shuffle_v4bf16_v4bf16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3155,20 +3155,20 @@ define void @v_shuffle_v4bf16_v4bf16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3202,18 +3202,18 @@ define void @v_shuffle_v4bf16_v4bf16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3255,23 +3255,23 @@ define void @v_shuffle_v4bf16_v4bf16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3316,23 +3316,23 @@ define void @v_shuffle_v4bf16_v4bf16__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3375,23 +3375,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3434,22 +3434,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3550,23 +3550,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3609,23 +3609,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3668,22 +3668,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3726,22 +3726,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3784,22 +3784,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3842,22 +3842,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3900,22 +3900,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -3958,22 +3958,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4018,24 +4018,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4078,22 +4078,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4138,23 +4138,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4197,23 +4197,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4258,23 +4258,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4311,20 +4311,20 @@ define void @v_shuffle_v4bf16_v4bf16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4360,19 +4360,19 @@ define void @v_shuffle_v4bf16_v4bf16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4408,20 +4408,20 @@ define void @v_shuffle_v4bf16_v4bf16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4457,19 +4457,19 @@ define void @v_shuffle_v4bf16_v4bf16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4505,20 +4505,20 @@ define void @v_shuffle_v4bf16_v4bf16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4554,20 +4554,20 @@ define void @v_shuffle_v4bf16_v4bf16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4609,23 +4609,23 @@ define void @v_shuffle_v4bf16_v4bf16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4668,23 +4668,23 @@ define void @v_shuffle_v4bf16_v4bf16__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4727,23 +4727,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4786,22 +4786,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4844,22 +4844,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4904,23 +4904,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -4965,23 +4965,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5024,22 +5024,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5084,23 +5084,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5143,22 +5143,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5203,23 +5203,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5262,22 +5262,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5322,24 +5322,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5382,22 +5382,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5440,22 +5440,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5500,23 +5500,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5559,23 +5559,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5620,23 +5620,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -5672,20 +5672,20 @@ define void @v_shuffle_v4bf16_v4bf16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5723,20 +5723,20 @@ define void @v_shuffle_v4bf16_v4bf16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5772,19 +5772,19 @@ define void @v_shuffle_v4bf16_v4bf16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5819,20 +5819,20 @@ define void @v_shuffle_v4bf16_v4bf16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5868,19 +5868,19 @@ define void @v_shuffle_v4bf16_v4bf16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5915,20 +5915,20 @@ define void @v_shuffle_v4bf16_v4bf16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5970,23 +5970,23 @@ define void @v_shuffle_v4bf16_v4bf16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6031,23 +6031,23 @@ define void @v_shuffle_v4bf16_v4bf16__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6090,23 +6090,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6149,22 +6149,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6207,22 +6207,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6265,22 +6265,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6323,23 +6323,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6382,22 +6382,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6440,22 +6440,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6498,22 +6498,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6612,22 +6612,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6672,24 +6672,24 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6732,22 +6732,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6788,22 +6788,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6848,23 +6848,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6907,23 +6907,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -6968,23 +6968,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7026,16 +7026,16 @@ define void @v_shuffle_v4bf16_v4bf16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7067,17 +7067,17 @@ define void @v_shuffle_v4bf16_v4bf16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7109,17 +7109,17 @@ define void @v_shuffle_v4bf16_v4bf16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7151,17 +7151,17 @@ define void @v_shuffle_v4bf16_v4bf16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> store <4 x bfloat> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7208,19 +7208,19 @@ define void @v_shuffle_v4bf16_v4bf16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7257,20 +7257,20 @@ define void @v_shuffle_v4bf16_v4bf16__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7307,20 +7307,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7357,20 +7357,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7413,23 +7413,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7474,23 +7474,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7533,23 +7533,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7594,23 +7594,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7649,20 +7649,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7699,20 +7699,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7751,20 +7751,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7801,20 +7801,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7859,23 +7859,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7918,23 +7918,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -7979,23 +7979,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8038,23 +8038,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8091,20 +8091,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8143,20 +8143,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8191,18 +8191,18 @@ define void @v_shuffle_v4bf16_v4bf16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8247,23 +8247,23 @@ define void @v_shuffle_v4bf16_v4bf16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8306,23 +8306,23 @@ define void @v_shuffle_v4bf16_v4bf16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8367,23 +8367,23 @@ define void @v_shuffle_v4bf16_v4bf16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8426,23 +8426,23 @@ define void @v_shuffle_v4bf16_v4bf16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8477,18 +8477,18 @@ define void @v_shuffle_v4bf16_v4bf16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4bf16_v4bf16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8576,20 +8576,20 @@ define void @v_shuffle_v4bf16_v4bf16__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8626,20 +8626,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8676,20 +8676,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8732,23 +8732,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8791,23 +8791,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8850,23 +8850,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8909,23 +8909,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -8962,20 +8962,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9012,20 +9012,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9062,20 +9062,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9112,20 +9112,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9170,23 +9170,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9229,23 +9229,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9290,23 +9290,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9349,23 +9349,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9402,20 +9402,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9454,20 +9454,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9504,20 +9504,20 @@ define void @v_shuffle_v4bf16_v4bf16__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9560,23 +9560,23 @@ define void @v_shuffle_v4bf16_v4bf16__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9619,23 +9619,23 @@ define void @v_shuffle_v4bf16_v4bf16__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @v_shuffle_v4bf16_v4bf16__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9737,23 +9737,23 @@ define void @v_shuffle_v4bf16_v4bf16__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9790,19 +9790,19 @@ define void @v_shuffle_v4bf16_v4bf16__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9839,20 +9839,20 @@ define void @v_shuffle_v4bf16_v4bf16__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9889,19 +9889,19 @@ define void @v_shuffle_v4bf16_v4bf16__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9938,20 +9938,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -9988,19 +9988,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10043,23 +10043,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10104,23 +10104,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10163,23 +10163,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10224,23 +10224,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10277,19 +10277,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10328,20 +10328,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10380,20 +10380,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10430,19 +10430,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10487,23 +10487,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10546,22 +10546,22 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10606,23 +10606,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10665,23 +10665,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10720,20 +10720,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10770,20 +10770,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10819,20 +10819,20 @@ define void @v_shuffle_v4bf16_v4bf16__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10877,23 +10877,23 @@ define void @v_shuffle_v4bf16_v4bf16__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10936,23 +10936,23 @@ define void @v_shuffle_v4bf16_v4bf16__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -10997,23 +10997,23 @@ define void @v_shuffle_v4bf16_v4bf16__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11056,23 +11056,23 @@ define void @v_shuffle_v4bf16_v4bf16__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11111,20 +11111,20 @@ define void @v_shuffle_v4bf16_v4bf16__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11161,19 +11161,19 @@ define void @v_shuffle_v4bf16_v4bf16__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11209,20 +11209,20 @@ define void @v_shuffle_v4bf16_v4bf16__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11259,20 +11259,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11315,23 +11315,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11374,23 +11374,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11433,23 +11433,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11492,23 +11492,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11545,20 +11545,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11595,19 +11595,19 @@ define void @v_shuffle_v4bf16_v4bf16__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11644,20 +11644,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11692,18 +11692,18 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11748,23 +11748,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11807,23 +11807,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11868,23 +11868,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11927,23 +11927,23 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -11982,20 +11982,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12032,20 +12032,20 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12080,18 +12080,18 @@ define void @v_shuffle_v4bf16_v4bf16__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4bf16_v4bf16__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=v"() %vec1 = call <4 x bfloat> asm "; def $0", "=v"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12136,17 +12136,17 @@ define void @s_shuffle_v4bf16_v4bf16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -12178,17 +12178,17 @@ define void @s_shuffle_v4bf16_v4bf16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -12220,17 +12220,17 @@ define void @s_shuffle_v4bf16_v4bf16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -12262,17 +12262,17 @@ define void @s_shuffle_v4bf16_v4bf16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -12318,17 +12318,17 @@ define void @s_shuffle_v4bf16_v4bf16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12361,17 +12361,17 @@ define void @s_shuffle_v4bf16_v4bf16__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12404,17 +12404,17 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12455,21 +12455,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12512,22 +12512,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12568,21 +12568,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12625,22 +12625,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12675,18 +12675,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12723,19 +12723,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12770,18 +12770,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12816,18 +12816,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12870,22 +12870,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12928,22 +12928,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -12984,21 +12984,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13041,22 +13041,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13093,19 +13093,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13142,19 +13142,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13189,18 +13189,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13235,18 +13235,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13289,22 +13289,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13349,23 +13349,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13408,22 +13408,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13468,23 +13468,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13521,19 +13521,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13572,20 +13572,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13622,19 +13622,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13671,19 +13671,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -13718,18 +13718,18 @@ define void @s_shuffle_v4bf16_v4bf16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -13763,18 +13763,18 @@ define void @s_shuffle_v4bf16_v4bf16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -13810,19 +13810,19 @@ define void @s_shuffle_v4bf16_v4bf16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -13856,18 +13856,18 @@ define void @s_shuffle_v4bf16_v4bf16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -13903,19 +13903,19 @@ define void @s_shuffle_v4bf16_v4bf16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -13949,18 +13949,18 @@ define void @s_shuffle_v4bf16_v4bf16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -14002,22 +14002,22 @@ define void @s_shuffle_v4bf16_v4bf16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14058,21 +14058,21 @@ define void @s_shuffle_v4bf16_v4bf16__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14115,22 +14115,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14171,21 +14171,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14230,23 +14230,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14289,22 +14289,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14349,23 +14349,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14408,22 +14408,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14468,23 +14468,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14527,22 +14527,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14585,22 +14585,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14643,22 +14643,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14703,23 +14703,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14762,22 +14762,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14822,23 +14822,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14881,22 +14881,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -14941,23 +14941,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15000,22 +15000,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15050,18 +15050,18 @@ define void @s_shuffle_v4bf16_v4bf16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15095,18 +15095,18 @@ define void @s_shuffle_v4bf16_v4bf16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15142,19 +15142,19 @@ define void @s_shuffle_v4bf16_v4bf16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15190,19 +15190,19 @@ define void @s_shuffle_v4bf16_v4bf16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15240,20 +15240,20 @@ define void @s_shuffle_v4bf16_v4bf16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15287,18 +15287,18 @@ define void @s_shuffle_v4bf16_v4bf16__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -15342,23 +15342,23 @@ define void @s_shuffle_v4bf16_v4bf16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15401,22 +15401,22 @@ define void @s_shuffle_v4bf16_v4bf16__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15461,23 +15461,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15520,22 +15520,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15580,23 +15580,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15641,23 +15641,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15704,24 +15704,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15766,23 +15766,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15829,24 +15829,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15891,23 +15891,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -15952,23 +15952,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16011,22 +16011,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16069,22 +16069,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16129,23 +16129,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16192,24 +16192,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16254,23 +16254,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16317,24 +16317,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16379,23 +16379,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16430,18 +16430,18 @@ define void @s_shuffle_v4bf16_v4bf16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16475,18 +16475,18 @@ define void @s_shuffle_v4bf16_v4bf16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16522,19 +16522,19 @@ define void @s_shuffle_v4bf16_v4bf16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16568,18 +16568,18 @@ define void @s_shuffle_v4bf16_v4bf16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16615,19 +16615,19 @@ define void @s_shuffle_v4bf16_v4bf16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16661,18 +16661,18 @@ define void @s_shuffle_v4bf16_v4bf16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -16714,22 +16714,22 @@ define void @s_shuffle_v4bf16_v4bf16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16770,21 +16770,21 @@ define void @s_shuffle_v4bf16_v4bf16__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16827,22 +16827,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16883,21 +16883,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -16940,22 +16940,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17000,23 +17000,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17061,23 +17061,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17120,22 +17120,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17180,23 +17180,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17239,22 +17239,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17297,22 +17297,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17355,22 +17355,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17413,22 +17413,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17473,23 +17473,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17534,23 +17534,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17593,22 +17593,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17653,23 +17653,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17712,22 +17712,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -17764,19 +17764,19 @@ define void @s_shuffle_v4bf16_v4bf16__u_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -17812,19 +17812,19 @@ define void @s_shuffle_v4bf16_v4bf16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -17862,20 +17862,20 @@ define void @s_shuffle_v4bf16_v4bf16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -17911,19 +17911,19 @@ define void @s_shuffle_v4bf16_v4bf16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -17959,19 +17959,19 @@ define void @s_shuffle_v4bf16_v4bf16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -18007,19 +18007,19 @@ define void @s_shuffle_v4bf16_v4bf16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -18063,23 +18063,23 @@ define void @s_shuffle_v4bf16_v4bf16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18122,22 +18122,22 @@ define void @s_shuffle_v4bf16_v4bf16__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18182,23 +18182,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18241,22 +18241,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18301,23 +18301,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18364,24 +18364,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18426,23 +18426,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18487,23 +18487,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18550,24 +18550,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18612,23 +18612,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18673,23 +18673,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18730,21 +18730,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18789,23 +18789,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18852,24 +18852,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18910,21 +18910,21 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -18969,23 +18969,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19032,24 +19032,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19094,23 +19094,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19155,17 +19155,17 @@ define void @s_shuffle_v4bf16_v4bf16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -19197,17 +19197,17 @@ define void @s_shuffle_v4bf16_v4bf16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -19239,17 +19239,17 @@ define void @s_shuffle_v4bf16_v4bf16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -19281,17 +19281,17 @@ define void @s_shuffle_v4bf16_v4bf16__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x bfloat> %shuf) @@ -19341,19 +19341,19 @@ define void @s_shuffle_v4bf16_v4bf16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19388,18 +19388,18 @@ define void @s_shuffle_v4bf16_v4bf16__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19436,19 +19436,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19483,18 +19483,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19537,22 +19537,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19597,23 +19597,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19656,22 +19656,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19716,23 +19716,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19771,20 +19771,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19821,19 +19821,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19870,19 +19870,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19919,19 +19919,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -19974,22 +19974,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20034,23 +20034,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20093,22 +20093,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20153,23 +20153,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20208,20 +20208,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20258,19 +20258,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20305,18 +20305,18 @@ define void @s_shuffle_v4bf16_v4bf16__u_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20359,22 +20359,22 @@ define void @s_shuffle_v4bf16_v4bf16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20419,23 +20419,23 @@ define void @s_shuffle_v4bf16_v4bf16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20478,22 +20478,22 @@ define void @s_shuffle_v4bf16_v4bf16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20538,23 +20538,23 @@ define void @s_shuffle_v4bf16_v4bf16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20589,18 +20589,18 @@ define void @s_shuffle_v4bf16_v4bf16__4_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20637,19 +20637,19 @@ define void @s_shuffle_v4bf16_v4bf16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20686,19 +20686,19 @@ define void @s_shuffle_v4bf16_v4bf16__6_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20737,20 +20737,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20787,19 +20787,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20844,23 +20844,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20907,24 +20907,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -20969,23 +20969,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21032,24 +21032,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21088,20 +21088,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21140,20 +21140,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21192,20 +21192,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21242,19 +21242,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21299,23 +21299,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21362,24 +21362,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21424,23 +21424,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21487,24 +21487,24 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21541,19 +21541,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21592,20 +21592,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21640,18 +21640,18 @@ define void @s_shuffle_v4bf16_v4bf16__u_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21692,21 +21692,21 @@ define void @s_shuffle_v4bf16_v4bf16__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21749,22 +21749,22 @@ define void @s_shuffle_v4bf16_v4bf16__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21805,21 +21805,21 @@ define void @s_shuffle_v4bf16_v4bf16__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21862,22 +21862,22 @@ define void @s_shuffle_v4bf16_v4bf16__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21912,18 +21912,18 @@ define void @s_shuffle_v4bf16_v4bf16__4_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -21960,19 +21960,19 @@ define void @s_shuffle_v4bf16_v4bf16__5_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22007,18 +22007,18 @@ define void @s_shuffle_v4bf16_v4bf16__6_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22055,19 +22055,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22102,18 +22102,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22156,22 +22156,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22216,23 +22216,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22275,22 +22275,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22335,23 +22335,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22388,19 +22388,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22439,20 +22439,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22489,19 +22489,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22538,19 +22538,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22593,22 +22593,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22653,23 +22653,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22712,22 +22712,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22772,23 +22772,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22825,19 +22825,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22876,20 +22876,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22926,19 +22926,19 @@ define void @s_shuffle_v4bf16_v4bf16__u_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -22981,22 +22981,22 @@ define void @s_shuffle_v4bf16_v4bf16__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23041,23 +23041,23 @@ define void @s_shuffle_v4bf16_v4bf16__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23100,22 +23100,22 @@ define void @s_shuffle_v4bf16_v4bf16__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23160,23 +23160,23 @@ define void @s_shuffle_v4bf16_v4bf16__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23213,19 +23213,19 @@ define void @s_shuffle_v4bf16_v4bf16__4_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23264,20 +23264,20 @@ define void @s_shuffle_v4bf16_v4bf16__5_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23314,19 +23314,19 @@ define void @s_shuffle_v4bf16_v4bf16__6_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23361,18 +23361,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s8, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s8, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23415,22 +23415,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23475,23 +23475,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23534,22 +23534,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23594,23 +23594,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23647,19 +23647,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23698,20 +23698,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23748,19 +23748,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23795,18 +23795,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23849,22 +23849,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23909,23 +23909,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -23968,22 +23968,22 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -24028,23 +24028,23 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -24081,19 +24081,19 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -24132,20 +24132,20 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> @@ -24180,18 +24180,18 @@ define void @s_shuffle_v4bf16_v4bf16__7_7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4bf16_v4bf16__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x bfloat> asm "; def $0", "=s"() %vec1 = call <4 x bfloat> asm "; def $0", "=s"() %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v2f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v2f16.ll index 101ddffaf05fa..c7a6f4359494a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v2f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f16_v2f16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4f16_v2f16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4f16_v2f16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v4f16_v2f16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4f16_v2f16__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v4f16_v2f16__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v4f16_v2f16__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v4f16_v2f16__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -381,21 +381,21 @@ define void @v_shuffle_v4f16_v2f16__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -438,22 +438,22 @@ define void @v_shuffle_v4f16_v2f16__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -488,19 +488,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -537,19 +537,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -592,23 +592,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -651,23 +651,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -704,19 +704,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -753,19 +753,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -802,19 +802,19 @@ define void @v_shuffle_v4f16_v2f16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -850,19 +850,19 @@ define void @v_shuffle_v4f16_v2f16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> zeroinitializer store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -898,19 +898,19 @@ define void @v_shuffle_v4f16_v2f16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -946,19 +946,19 @@ define void @v_shuffle_v4f16_v2f16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1000,22 +1000,22 @@ define void @v_shuffle_v4f16_v2f16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1058,22 +1058,22 @@ define void @v_shuffle_v4f16_v2f16__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1118,23 +1118,23 @@ define void @v_shuffle_v4f16_v2f16__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1177,22 +1177,22 @@ define void @v_shuffle_v4f16_v2f16__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1237,23 +1237,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1296,22 +1296,22 @@ define void @v_shuffle_v4f16_v2f16__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1354,22 +1354,22 @@ define void @v_shuffle_v4f16_v2f16__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1414,23 +1414,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1465,19 +1465,19 @@ define void @v_shuffle_v4f16_v2f16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1511,19 +1511,19 @@ define void @v_shuffle_v4f16_v2f16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1559,19 +1559,19 @@ define void @v_shuffle_v4f16_v2f16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4f16_v2f16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1659,22 +1659,22 @@ define void @v_shuffle_v4f16_v2f16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1717,22 +1717,22 @@ define void @v_shuffle_v4f16_v2f16__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1775,22 +1775,22 @@ define void @v_shuffle_v4f16_v2f16__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1833,22 +1833,22 @@ define void @v_shuffle_v4f16_v2f16__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1891,22 +1891,22 @@ define void @v_shuffle_v4f16_v2f16__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -1947,21 +1947,21 @@ define void @v_shuffle_v4f16_v2f16__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2002,21 +2002,21 @@ define void @v_shuffle_v4f16_v2f16__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2061,23 +2061,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2119,16 +2119,16 @@ define void @v_shuffle_v4f16_v2f16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2160,17 +2160,17 @@ define void @v_shuffle_v4f16_v2f16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2217,19 +2217,19 @@ define void @v_shuffle_v4f16_v2f16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2266,19 +2266,19 @@ define void @v_shuffle_v4f16_v2f16__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2321,23 +2321,23 @@ define void @v_shuffle_v4f16_v2f16__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2382,23 +2382,23 @@ define void @v_shuffle_v4f16_v2f16__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2437,20 +2437,20 @@ define void @v_shuffle_v4f16_v2f16__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2487,19 +2487,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2544,23 +2544,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2603,23 +2603,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2654,19 +2654,19 @@ define void @v_shuffle_v4f16_v2f16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2711,23 +2711,23 @@ define void @v_shuffle_v4f16_v2f16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v4f16_v2f16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2820,19 +2820,19 @@ define void @v_shuffle_v4f16_v2f16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2869,19 +2869,19 @@ define void @v_shuffle_v4f16_v2f16__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2924,23 +2924,23 @@ define void @v_shuffle_v4f16_v2f16__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -2983,22 +2983,22 @@ define void @v_shuffle_v4f16_v2f16__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3035,19 +3035,19 @@ define void @v_shuffle_v4f16_v2f16__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3082,19 +3082,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3139,23 +3139,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3198,23 +3198,23 @@ define void @v_shuffle_v4f16_v2f16__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3249,19 +3249,19 @@ define void @v_shuffle_v4f16_v2f16__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v2f16__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v2f16__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=v"() %vec1 = call <2 x half> asm "; def $0", "=v"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3306,17 +3306,17 @@ define void @s_shuffle_v4f16_v2f16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -3348,17 +3348,17 @@ define void @s_shuffle_v4f16_v2f16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -3404,17 +3404,17 @@ define void @s_shuffle_v4f16_v2f16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3455,21 +3455,21 @@ define void @s_shuffle_v4f16_v2f16__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3512,22 +3512,22 @@ define void @s_shuffle_v4f16_v2f16__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3562,18 +3562,18 @@ define void @s_shuffle_v4f16_v2f16__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3608,18 +3608,18 @@ define void @s_shuffle_v4f16_v2f16__3_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3660,21 +3660,21 @@ define void @s_shuffle_v4f16_v2f16__3_3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3717,22 +3717,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3767,18 +3767,18 @@ define void @s_shuffle_v4f16_v2f16__3_3_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3813,18 +3813,18 @@ define void @s_shuffle_v4f16_v2f16__3_3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3867,22 +3867,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3927,23 +3927,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -3980,19 +3980,19 @@ define void @s_shuffle_v4f16_v2f16__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4029,19 +4029,19 @@ define void @s_shuffle_v4f16_v2f16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4076,18 +4076,18 @@ define void @s_shuffle_v4f16_v2f16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4121,18 +4121,18 @@ define void @s_shuffle_v4f16_v2f16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4168,19 +4168,19 @@ define void @s_shuffle_v4f16_v2f16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4214,18 +4214,18 @@ define void @s_shuffle_v4f16_v2f16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4267,22 +4267,22 @@ define void @s_shuffle_v4f16_v2f16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4323,21 +4323,21 @@ define void @s_shuffle_v4f16_v2f16__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4382,23 +4382,23 @@ define void @s_shuffle_v4f16_v2f16__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4441,22 +4441,22 @@ define void @s_shuffle_v4f16_v2f16__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4499,22 +4499,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4557,22 +4557,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4617,23 +4617,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4676,22 +4676,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4726,18 +4726,18 @@ define void @s_shuffle_v4f16_v2f16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4771,18 +4771,18 @@ define void @s_shuffle_v4f16_v2f16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4818,19 +4818,19 @@ define void @s_shuffle_v4f16_v2f16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4864,18 +4864,18 @@ define void @s_shuffle_v4f16_v2f16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -4919,23 +4919,23 @@ define void @s_shuffle_v4f16_v2f16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -4978,22 +4978,22 @@ define void @s_shuffle_v4f16_v2f16__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5038,23 +5038,23 @@ define void @s_shuffle_v4f16_v2f16__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5099,23 +5099,23 @@ define void @s_shuffle_v4f16_v2f16__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5160,23 +5160,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5217,21 +5217,21 @@ define void @s_shuffle_v4f16_v2f16__3_3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5272,21 +5272,21 @@ define void @s_shuffle_v4f16_v2f16__3_3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5331,23 +5331,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5392,17 +5392,17 @@ define void @s_shuffle_v4f16_v2f16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -5434,17 +5434,17 @@ define void @s_shuffle_v4f16_v2f16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -5494,19 +5494,19 @@ define void @s_shuffle_v4f16_v2f16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5541,18 +5541,18 @@ define void @s_shuffle_v4f16_v2f16__3_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5595,22 +5595,22 @@ define void @s_shuffle_v4f16_v2f16__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5655,23 +5655,23 @@ define void @s_shuffle_v4f16_v2f16__3_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5708,19 +5708,19 @@ define void @s_shuffle_v4f16_v2f16__3_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5757,19 +5757,19 @@ define void @s_shuffle_v4f16_v2f16__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5812,22 +5812,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5872,23 +5872,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5923,18 +5923,18 @@ define void @s_shuffle_v4f16_v2f16__u_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -5977,22 +5977,22 @@ define void @s_shuffle_v4f16_v2f16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6037,23 +6037,23 @@ define void @s_shuffle_v4f16_v2f16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6088,18 +6088,18 @@ define void @s_shuffle_v4f16_v2f16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6134,18 +6134,18 @@ define void @s_shuffle_v4f16_v2f16__3_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s8, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s8, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6188,22 +6188,22 @@ define void @s_shuffle_v4f16_v2f16__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6248,23 +6248,23 @@ define void @s_shuffle_v4f16_v2f16__3_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6301,19 +6301,19 @@ define void @s_shuffle_v4f16_v2f16__3_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6348,18 +6348,18 @@ define void @s_shuffle_v4f16_v2f16__3_3_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6402,22 +6402,22 @@ define void @s_shuffle_v4f16_v2f16__3_3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6462,23 +6462,23 @@ define void @s_shuffle_v4f16_v2f16__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> @@ -6513,18 +6513,18 @@ define void @s_shuffle_v4f16_v2f16__3_3_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v2f16__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v2f16__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x half> asm "; def $0", "=s"() %vec1 = call <2 x half> asm "; def $0", "=s"() %shuf = shufflevector <2 x half> %vec0, <2 x half> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll index 37f4c293c4692..e91433ac4c1f7 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v3f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f16_v3f16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v4f16_v3f16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v4f16_v3f16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -124,17 +124,17 @@ define void @v_shuffle_v4f16_v3f16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v4f16_v3f16__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -224,17 +224,17 @@ define void @v_shuffle_v4f16_v3f16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -277,22 +277,22 @@ define void @v_shuffle_v4f16_v3f16__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -335,22 +335,22 @@ define void @v_shuffle_v4f16_v3f16__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -393,22 +393,22 @@ define void @v_shuffle_v4f16_v3f16__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -445,18 +445,18 @@ define void @v_shuffle_v4f16_v3f16__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -493,18 +493,18 @@ define void @v_shuffle_v4f16_v3f16__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -541,18 +541,18 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -597,22 +597,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -657,22 +657,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -715,22 +715,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -769,20 +769,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -821,20 +821,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -871,18 +871,18 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -927,23 +927,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -990,23 +990,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1051,23 +1051,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1106,20 +1106,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1160,20 +1160,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1212,19 +1212,19 @@ define void @v_shuffle_v4f16_v3f16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1263,19 +1263,19 @@ define void @v_shuffle_v4f16_v3f16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -1312,19 +1312,19 @@ define void @v_shuffle_v4f16_v3f16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> zeroinitializer @@ -1361,19 +1361,19 @@ define void @v_shuffle_v4f16_v3f16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -1410,20 +1410,20 @@ define void @v_shuffle_v4f16_v3f16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -1460,19 +1460,19 @@ define void @v_shuffle_v4f16_v3f16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -1515,22 +1515,22 @@ define void @v_shuffle_v4f16_v3f16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1575,23 +1575,23 @@ define void @v_shuffle_v4f16_v3f16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1636,22 +1636,22 @@ define void @v_shuffle_v4f16_v3f16__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1698,23 +1698,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1759,23 +1759,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1820,22 +1820,22 @@ define void @v_shuffle_v4f16_v3f16__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1882,24 +1882,24 @@ define void @v_shuffle_v4f16_v3f16__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -1944,22 +1944,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2004,22 +2004,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2064,22 +2064,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2124,22 +2124,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2184,23 +2184,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2245,23 +2245,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2298,18 +2298,18 @@ define void @v_shuffle_v4f16_v3f16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -2344,18 +2344,18 @@ define void @v_shuffle_v4f16_v3f16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -2392,19 +2392,19 @@ define void @v_shuffle_v4f16_v3f16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -2443,20 +2443,20 @@ define void @v_shuffle_v4f16_v3f16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -2491,18 +2491,18 @@ define void @v_shuffle_v4f16_v3f16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -2545,23 +2545,23 @@ define void @v_shuffle_v4f16_v3f16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2608,23 +2608,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2669,22 +2669,22 @@ define void @v_shuffle_v4f16_v3f16__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2731,23 +2731,23 @@ define void @v_shuffle_v4f16_v3f16__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2794,23 +2794,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2857,24 +2857,24 @@ define void @v_shuffle_v4f16_v3f16__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2921,24 +2921,24 @@ define void @v_shuffle_v4f16_v3f16__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -2985,24 +2985,24 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3047,22 +3047,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3107,22 +3107,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3169,24 +3169,24 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3233,23 +3233,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3296,23 +3296,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3351,20 +3351,20 @@ define void @v_shuffle_v4f16_v3f16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -3401,19 +3401,19 @@ define void @v_shuffle_v4f16_v3f16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -3450,20 +3450,20 @@ define void @v_shuffle_v4f16_v3f16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -3500,19 +3500,19 @@ define void @v_shuffle_v4f16_v3f16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -3549,20 +3549,20 @@ define void @v_shuffle_v4f16_v3f16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -3605,23 +3605,23 @@ define void @v_shuffle_v4f16_v3f16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3666,23 +3666,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3727,22 +3727,22 @@ define void @v_shuffle_v4f16_v3f16__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3787,22 +3787,22 @@ define void @v_shuffle_v4f16_v3f16__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3849,23 +3849,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3910,22 +3910,22 @@ define void @v_shuffle_v4f16_v3f16__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -3972,23 +3972,23 @@ define void @v_shuffle_v4f16_v3f16__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4033,22 +4033,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4093,22 +4093,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4153,22 +4153,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4213,22 +4213,22 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4273,23 +4273,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4334,23 +4334,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4395,16 +4395,16 @@ define void @v_shuffle_v4f16_v3f16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -4437,17 +4437,17 @@ define void @v_shuffle_v4f16_v3f16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -4480,17 +4480,17 @@ define void @v_shuffle_v4f16_v3f16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -4539,19 +4539,19 @@ define void @v_shuffle_v4f16_v3f16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4590,20 +4590,20 @@ define void @v_shuffle_v4f16_v3f16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4641,20 +4641,20 @@ define void @v_shuffle_v4f16_v3f16__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4699,23 +4699,23 @@ define void @v_shuffle_v4f16_v3f16__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4762,23 +4762,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4823,23 +4823,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4880,20 +4880,20 @@ define void @v_shuffle_v4f16_v3f16__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4932,20 +4932,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -4984,20 +4984,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5042,23 +5042,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5103,23 +5103,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5164,23 +5164,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5219,20 +5219,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5269,18 +5269,18 @@ define void @v_shuffle_v4f16_v3f16__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5327,23 +5327,23 @@ define void @v_shuffle_v4f16_v3f16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5388,23 +5388,23 @@ define void @v_shuffle_v4f16_v3f16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5451,23 +5451,23 @@ define void @v_shuffle_v4f16_v3f16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5504,18 +5504,18 @@ define void @v_shuffle_v4f16_v3f16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5554,19 +5554,19 @@ define void @v_shuffle_v4f16_v3f16__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5607,20 +5607,20 @@ define void @v_shuffle_v4f16_v3f16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5658,20 +5658,20 @@ define void @v_shuffle_v4f16_v3f16__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5718,23 +5718,23 @@ define void @v_shuffle_v4f16_v3f16__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5781,23 +5781,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5844,23 +5844,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5901,20 +5901,20 @@ define void @v_shuffle_v4f16_v3f16__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -5955,20 +5955,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6007,20 +6007,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6067,23 +6067,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6130,23 +6130,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6193,23 +6193,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6248,20 +6248,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6300,20 +6300,20 @@ define void @v_shuffle_v4f16_v3f16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6358,23 +6358,23 @@ define void @v_shuffle_v4f16_v3f16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6419,23 +6419,23 @@ define void @v_shuffle_v4f16_v3f16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6480,23 +6480,23 @@ define void @v_shuffle_v4f16_v3f16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6535,19 +6535,19 @@ define void @v_shuffle_v4f16_v3f16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6586,20 +6586,20 @@ define void @v_shuffle_v4f16_v3f16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6637,20 +6637,20 @@ define void @v_shuffle_v4f16_v3f16__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6695,23 +6695,23 @@ define void @v_shuffle_v4f16_v3f16__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6758,23 +6758,23 @@ define void @v_shuffle_v4f16_v3f16__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6819,23 +6819,23 @@ define void @v_shuffle_v4f16_v3f16__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6874,19 +6874,19 @@ define void @v_shuffle_v4f16_v3f16__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6927,20 +6927,20 @@ define void @v_shuffle_v4f16_v3f16__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -6979,19 +6979,19 @@ define void @v_shuffle_v4f16_v3f16__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7036,23 +7036,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7097,23 +7097,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7158,23 +7158,23 @@ define void @v_shuffle_v4f16_v3f16__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7213,20 +7213,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7265,20 +7265,20 @@ define void @v_shuffle_v4f16_v3f16__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v3f16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v3f16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7326,17 +7326,17 @@ define void @s_shuffle_v4f16_v3f16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -7369,17 +7369,17 @@ define void @s_shuffle_v4f16_v3f16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -7412,17 +7412,17 @@ define void @s_shuffle_v4f16_v3f16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -7470,17 +7470,17 @@ define void @s_shuffle_v4f16_v3f16__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7515,17 +7515,17 @@ define void @s_shuffle_v4f16_v3f16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7566,20 +7566,20 @@ define void @s_shuffle_v4f16_v3f16__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7622,21 +7622,21 @@ define void @s_shuffle_v4f16_v3f16__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7677,20 +7677,20 @@ define void @s_shuffle_v4f16_v3f16__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7725,17 +7725,17 @@ define void @s_shuffle_v4f16_v3f16__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7772,18 +7772,18 @@ define void @s_shuffle_v4f16_v3f16__5_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7818,17 +7818,17 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7871,21 +7871,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7928,21 +7928,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -7983,20 +7983,20 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8033,18 +8033,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8081,18 +8081,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8156,21 +8156,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8215,22 +8215,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8273,21 +8273,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8324,18 +8324,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8374,19 +8374,19 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8423,18 +8423,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8471,18 +8471,18 @@ define void @s_shuffle_v4f16_v3f16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -8517,18 +8517,18 @@ define void @s_shuffle_v4f16_v3f16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> zeroinitializer @@ -8565,19 +8565,19 @@ define void @s_shuffle_v4f16_v3f16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -8612,18 +8612,18 @@ define void @s_shuffle_v4f16_v3f16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -8658,18 +8658,18 @@ define void @s_shuffle_v4f16_v3f16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -8712,22 +8712,22 @@ define void @s_shuffle_v4f16_v3f16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8770,21 +8770,21 @@ define void @s_shuffle_v4f16_v3f16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8827,21 +8827,21 @@ define void @s_shuffle_v4f16_v3f16__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8886,22 +8886,22 @@ define void @s_shuffle_v4f16_v3f16__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -8944,21 +8944,21 @@ define void @s_shuffle_v4f16_v3f16__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9001,21 +9001,21 @@ define void @s_shuffle_v4f16_v3f16__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9060,22 +9060,22 @@ define void @s_shuffle_v4f16_v3f16__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9118,21 +9118,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9175,21 +9175,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9234,22 +9234,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9292,21 +9292,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9349,21 +9349,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9408,22 +9408,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9460,18 +9460,18 @@ define void @s_shuffle_v4f16_v3f16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -9506,18 +9506,18 @@ define void @s_shuffle_v4f16_v3f16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -9554,19 +9554,19 @@ define void @s_shuffle_v4f16_v3f16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -9603,19 +9603,19 @@ define void @s_shuffle_v4f16_v3f16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -9650,18 +9650,18 @@ define void @s_shuffle_v4f16_v3f16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -9706,23 +9706,23 @@ define void @s_shuffle_v4f16_v3f16__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9767,22 +9767,22 @@ define void @s_shuffle_v4f16_v3f16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9827,22 +9827,22 @@ define void @s_shuffle_v4f16_v3f16__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9887,22 +9887,22 @@ define void @s_shuffle_v4f16_v3f16__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -9947,22 +9947,22 @@ define void @s_shuffle_v4f16_v3f16__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10007,22 +10007,22 @@ define void @s_shuffle_v4f16_v3f16__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10069,23 +10069,23 @@ define void @s_shuffle_v4f16_v3f16__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10130,22 +10130,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10188,21 +10188,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10245,21 +10245,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10304,22 +10304,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10364,22 +10364,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10426,23 +10426,23 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10479,18 +10479,18 @@ define void @s_shuffle_v4f16_v3f16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -10525,18 +10525,18 @@ define void @s_shuffle_v4f16_v3f16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -10573,19 +10573,19 @@ define void @s_shuffle_v4f16_v3f16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -10620,18 +10620,18 @@ define void @s_shuffle_v4f16_v3f16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -10666,18 +10666,18 @@ define void @s_shuffle_v4f16_v3f16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -10720,22 +10720,22 @@ define void @s_shuffle_v4f16_v3f16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10778,21 +10778,21 @@ define void @s_shuffle_v4f16_v3f16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10835,21 +10835,21 @@ define void @s_shuffle_v4f16_v3f16__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10892,21 +10892,21 @@ define void @s_shuffle_v4f16_v3f16__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -10951,22 +10951,22 @@ define void @s_shuffle_v4f16_v3f16__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11009,21 +11009,21 @@ define void @s_shuffle_v4f16_v3f16__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11068,22 +11068,22 @@ define void @s_shuffle_v4f16_v3f16__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11126,21 +11126,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11183,21 +11183,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11240,21 +11240,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11299,22 +11299,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11357,21 +11357,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11416,22 +11416,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11479,17 +11479,17 @@ define void @s_shuffle_v4f16_v3f16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -11522,17 +11522,17 @@ define void @s_shuffle_v4f16_v3f16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -11565,17 +11565,17 @@ define void @s_shuffle_v4f16_v3f16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <4 x i32> @@ -11627,19 +11627,19 @@ define void @s_shuffle_v4f16_v3f16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11676,18 +11676,18 @@ define void @s_shuffle_v4f16_v3f16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11724,18 +11724,18 @@ define void @s_shuffle_v4f16_v3f16__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11778,21 +11778,21 @@ define void @s_shuffle_v4f16_v3f16__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11837,22 +11837,22 @@ define void @s_shuffle_v4f16_v3f16__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11895,21 +11895,21 @@ define void @s_shuffle_v4f16_v3f16__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11948,19 +11948,19 @@ define void @s_shuffle_v4f16_v3f16__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -11997,18 +11997,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12045,18 +12045,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12099,21 +12099,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12158,22 +12158,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12216,21 +12216,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12269,19 +12269,19 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12318,18 +12318,18 @@ define void @s_shuffle_v4f16_v3f16__u_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12374,22 +12374,22 @@ define void @s_shuffle_v4f16_v3f16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12436,23 +12436,23 @@ define void @s_shuffle_v4f16_v3f16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12497,22 +12497,22 @@ define void @s_shuffle_v4f16_v3f16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12549,18 +12549,18 @@ define void @s_shuffle_v4f16_v3f16__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12599,19 +12599,19 @@ define void @s_shuffle_v4f16_v3f16__4_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12650,19 +12650,19 @@ define void @s_shuffle_v4f16_v3f16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12701,19 +12701,19 @@ define void @s_shuffle_v4f16_v3f16__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12758,22 +12758,22 @@ define void @s_shuffle_v4f16_v3f16__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12820,23 +12820,23 @@ define void @s_shuffle_v4f16_v3f16__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12881,22 +12881,22 @@ define void @s_shuffle_v4f16_v3f16__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12935,19 +12935,19 @@ define void @s_shuffle_v4f16_v3f16__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -12986,19 +12986,19 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13035,18 +13035,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13091,22 +13091,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13153,23 +13153,23 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13214,22 +13214,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13266,18 +13266,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13314,18 +13314,18 @@ define void @s_shuffle_v4f16_v3f16__u_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13368,21 +13368,21 @@ define void @s_shuffle_v4f16_v3f16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13427,22 +13427,22 @@ define void @s_shuffle_v4f16_v3f16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13485,21 +13485,21 @@ define void @s_shuffle_v4f16_v3f16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13536,18 +13536,18 @@ define void @s_shuffle_v4f16_v3f16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13586,19 +13586,19 @@ define void @s_shuffle_v4f16_v3f16__4_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13635,18 +13635,18 @@ define void @s_shuffle_v4f16_v3f16__5_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13689,21 +13689,21 @@ define void @s_shuffle_v4f16_v3f16__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13748,22 +13748,22 @@ define void @s_shuffle_v4f16_v3f16__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13806,21 +13806,21 @@ define void @s_shuffle_v4f16_v3f16__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13857,18 +13857,18 @@ define void @s_shuffle_v4f16_v3f16__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13907,19 +13907,19 @@ define void @s_shuffle_v4f16_v3f16__5_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -13956,18 +13956,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -14010,21 +14010,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -14069,22 +14069,22 @@ define void @s_shuffle_v4f16_v3f16__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -14127,21 +14127,21 @@ define void @s_shuffle_v4f16_v3f16__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -14178,18 +14178,18 @@ define void @s_shuffle_v4f16_v3f16__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> @@ -14228,19 +14228,19 @@ define void @s_shuffle_v4f16_v3f16__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v3f16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v3f16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll index fe84fb3f39e35..47100b9983559 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f16.v4f16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f16_v4f16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4f16_v4f16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4f16_v4f16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -121,17 +121,17 @@ define void @v_shuffle_v4f16_v4f16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v4f16_v4f16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v4f16_v4f16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4f16_v4f16__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4f16_v4f16__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -351,21 +351,21 @@ define void @v_shuffle_v4f16_v4f16__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -406,22 +406,22 @@ define void @v_shuffle_v4f16_v4f16__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -460,21 +460,21 @@ define void @v_shuffle_v4f16_v4f16__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -515,22 +515,22 @@ define void @v_shuffle_v4f16_v4f16__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -563,17 +563,17 @@ define void @v_shuffle_v4f16_v4f16__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -608,18 +608,18 @@ define void @v_shuffle_v4f16_v4f16__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -652,17 +652,17 @@ define void @v_shuffle_v4f16_v4f16__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -751,22 +751,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -809,22 +809,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -865,22 +865,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -923,22 +923,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -975,20 +975,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1025,20 +1025,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1073,18 +1073,18 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1121,20 +1121,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1177,23 +1177,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1236,23 +1236,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1295,23 +1295,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1354,23 +1354,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1407,20 +1407,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1457,20 +1457,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1507,19 +1507,19 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1556,19 +1556,19 @@ define void @v_shuffle_v4f16_v4f16__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4f16_v4f16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1653,19 +1653,19 @@ define void @v_shuffle_v4f16_v4f16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> zeroinitializer store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1701,19 +1701,19 @@ define void @v_shuffle_v4f16_v4f16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1749,20 +1749,20 @@ define void @v_shuffle_v4f16_v4f16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1798,20 +1798,20 @@ define void @v_shuffle_v4f16_v4f16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1847,19 +1847,19 @@ define void @v_shuffle_v4f16_v4f16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1901,22 +1901,22 @@ define void @v_shuffle_v4f16_v4f16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -1959,23 +1959,23 @@ define void @v_shuffle_v4f16_v4f16__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2018,22 +2018,22 @@ define void @v_shuffle_v4f16_v4f16__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2076,22 +2076,22 @@ define void @v_shuffle_v4f16_v4f16__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2136,23 +2136,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2195,23 +2195,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2256,23 +2256,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2315,22 +2315,22 @@ define void @v_shuffle_v4f16_v4f16__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2375,24 +2375,24 @@ define void @v_shuffle_v4f16_v4f16__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2435,22 +2435,22 @@ define void @v_shuffle_v4f16_v4f16__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2495,24 +2495,24 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2555,22 +2555,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2613,22 +2613,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2673,24 +2673,24 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2793,23 +2793,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2852,23 +2852,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4f16_v4f16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4f16_v4f16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4f16_v4f16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3106,20 +3106,20 @@ define void @v_shuffle_v4f16_v4f16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3155,20 +3155,20 @@ define void @v_shuffle_v4f16_v4f16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3202,18 +3202,18 @@ define void @v_shuffle_v4f16_v4f16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3255,23 +3255,23 @@ define void @v_shuffle_v4f16_v4f16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3316,23 +3316,23 @@ define void @v_shuffle_v4f16_v4f16__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3375,23 +3375,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3434,22 +3434,22 @@ define void @v_shuffle_v4f16_v4f16__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v4f16_v4f16__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3550,23 +3550,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3609,23 +3609,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3668,22 +3668,22 @@ define void @v_shuffle_v4f16_v4f16__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3726,22 +3726,22 @@ define void @v_shuffle_v4f16_v4f16__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3784,22 +3784,22 @@ define void @v_shuffle_v4f16_v4f16__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3842,22 +3842,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3900,22 +3900,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -3958,22 +3958,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4018,24 +4018,24 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4078,22 +4078,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4138,23 +4138,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4197,23 +4197,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4258,23 +4258,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4311,20 +4311,20 @@ define void @v_shuffle_v4f16_v4f16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4360,19 +4360,19 @@ define void @v_shuffle_v4f16_v4f16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4408,20 +4408,20 @@ define void @v_shuffle_v4f16_v4f16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4457,19 +4457,19 @@ define void @v_shuffle_v4f16_v4f16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4505,20 +4505,20 @@ define void @v_shuffle_v4f16_v4f16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4554,20 +4554,20 @@ define void @v_shuffle_v4f16_v4f16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4609,23 +4609,23 @@ define void @v_shuffle_v4f16_v4f16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4668,23 +4668,23 @@ define void @v_shuffle_v4f16_v4f16__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4727,23 +4727,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4786,22 +4786,22 @@ define void @v_shuffle_v4f16_v4f16__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4844,22 +4844,22 @@ define void @v_shuffle_v4f16_v4f16__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4904,23 +4904,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -4965,23 +4965,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5024,22 +5024,22 @@ define void @v_shuffle_v4f16_v4f16__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5084,23 +5084,23 @@ define void @v_shuffle_v4f16_v4f16__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5143,22 +5143,22 @@ define void @v_shuffle_v4f16_v4f16__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5203,23 +5203,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5262,22 +5262,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5322,24 +5322,24 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5382,22 +5382,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5440,22 +5440,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5500,23 +5500,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5559,23 +5559,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5620,23 +5620,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -5672,20 +5672,20 @@ define void @v_shuffle_v4f16_v4f16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5723,20 +5723,20 @@ define void @v_shuffle_v4f16_v4f16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5772,19 +5772,19 @@ define void @v_shuffle_v4f16_v4f16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5819,20 +5819,20 @@ define void @v_shuffle_v4f16_v4f16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5868,19 +5868,19 @@ define void @v_shuffle_v4f16_v4f16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5915,20 +5915,20 @@ define void @v_shuffle_v4f16_v4f16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5970,23 +5970,23 @@ define void @v_shuffle_v4f16_v4f16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6031,23 +6031,23 @@ define void @v_shuffle_v4f16_v4f16__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6090,23 +6090,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6149,22 +6149,22 @@ define void @v_shuffle_v4f16_v4f16__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6207,22 +6207,22 @@ define void @v_shuffle_v4f16_v4f16__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6265,22 +6265,22 @@ define void @v_shuffle_v4f16_v4f16__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6323,23 +6323,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6382,22 +6382,22 @@ define void @v_shuffle_v4f16_v4f16__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6440,22 +6440,22 @@ define void @v_shuffle_v4f16_v4f16__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6498,22 +6498,22 @@ define void @v_shuffle_v4f16_v4f16__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6612,22 +6612,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6672,24 +6672,24 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6732,22 +6732,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6788,22 +6788,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6848,23 +6848,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6907,23 +6907,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -6968,23 +6968,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7026,16 +7026,16 @@ define void @v_shuffle_v4f16_v4f16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7067,17 +7067,17 @@ define void @v_shuffle_v4f16_v4f16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7109,17 +7109,17 @@ define void @v_shuffle_v4f16_v4f16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7151,17 +7151,17 @@ define void @v_shuffle_v4f16_v4f16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> store <4 x half> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7208,19 +7208,19 @@ define void @v_shuffle_v4f16_v4f16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7257,20 +7257,20 @@ define void @v_shuffle_v4f16_v4f16__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7307,20 +7307,20 @@ define void @v_shuffle_v4f16_v4f16__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7357,20 +7357,20 @@ define void @v_shuffle_v4f16_v4f16__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7413,23 +7413,23 @@ define void @v_shuffle_v4f16_v4f16__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7474,23 +7474,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7533,23 +7533,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7594,23 +7594,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7649,20 +7649,20 @@ define void @v_shuffle_v4f16_v4f16__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7699,20 +7699,20 @@ define void @v_shuffle_v4f16_v4f16__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7751,20 +7751,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7801,20 +7801,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7859,23 +7859,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7918,23 +7918,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -7979,23 +7979,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8038,23 +8038,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8091,20 +8091,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8143,20 +8143,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8191,18 +8191,18 @@ define void @v_shuffle_v4f16_v4f16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8247,23 +8247,23 @@ define void @v_shuffle_v4f16_v4f16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8306,23 +8306,23 @@ define void @v_shuffle_v4f16_v4f16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8367,23 +8367,23 @@ define void @v_shuffle_v4f16_v4f16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8426,23 +8426,23 @@ define void @v_shuffle_v4f16_v4f16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8477,18 +8477,18 @@ define void @v_shuffle_v4f16_v4f16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4f16_v4f16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8576,20 +8576,20 @@ define void @v_shuffle_v4f16_v4f16__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8626,20 +8626,20 @@ define void @v_shuffle_v4f16_v4f16__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8676,20 +8676,20 @@ define void @v_shuffle_v4f16_v4f16__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8732,23 +8732,23 @@ define void @v_shuffle_v4f16_v4f16__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8791,23 +8791,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8850,23 +8850,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8909,23 +8909,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -8962,20 +8962,20 @@ define void @v_shuffle_v4f16_v4f16__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9012,20 +9012,20 @@ define void @v_shuffle_v4f16_v4f16__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9062,20 +9062,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9112,20 +9112,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9170,23 +9170,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9229,23 +9229,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9290,23 +9290,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9349,23 +9349,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9402,20 +9402,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9454,20 +9454,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9504,20 +9504,20 @@ define void @v_shuffle_v4f16_v4f16__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9560,23 +9560,23 @@ define void @v_shuffle_v4f16_v4f16__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9619,23 +9619,23 @@ define void @v_shuffle_v4f16_v4f16__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @v_shuffle_v4f16_v4f16__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9737,23 +9737,23 @@ define void @v_shuffle_v4f16_v4f16__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9790,19 +9790,19 @@ define void @v_shuffle_v4f16_v4f16__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9839,20 +9839,20 @@ define void @v_shuffle_v4f16_v4f16__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9889,19 +9889,19 @@ define void @v_shuffle_v4f16_v4f16__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9938,20 +9938,20 @@ define void @v_shuffle_v4f16_v4f16__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -9988,19 +9988,19 @@ define void @v_shuffle_v4f16_v4f16__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10043,23 +10043,23 @@ define void @v_shuffle_v4f16_v4f16__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10104,23 +10104,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10163,23 +10163,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10224,23 +10224,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10277,19 +10277,19 @@ define void @v_shuffle_v4f16_v4f16__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10328,20 +10328,20 @@ define void @v_shuffle_v4f16_v4f16__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10380,20 +10380,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10430,19 +10430,19 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10487,23 +10487,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10546,22 +10546,22 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10606,23 +10606,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10665,23 +10665,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10720,20 +10720,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10770,20 +10770,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10819,20 +10819,20 @@ define void @v_shuffle_v4f16_v4f16__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10877,23 +10877,23 @@ define void @v_shuffle_v4f16_v4f16__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10936,23 +10936,23 @@ define void @v_shuffle_v4f16_v4f16__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -10997,23 +10997,23 @@ define void @v_shuffle_v4f16_v4f16__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11056,23 +11056,23 @@ define void @v_shuffle_v4f16_v4f16__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11111,20 +11111,20 @@ define void @v_shuffle_v4f16_v4f16__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11161,19 +11161,19 @@ define void @v_shuffle_v4f16_v4f16__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11209,20 +11209,20 @@ define void @v_shuffle_v4f16_v4f16__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11259,20 +11259,20 @@ define void @v_shuffle_v4f16_v4f16__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11315,23 +11315,23 @@ define void @v_shuffle_v4f16_v4f16__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11374,23 +11374,23 @@ define void @v_shuffle_v4f16_v4f16__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11433,23 +11433,23 @@ define void @v_shuffle_v4f16_v4f16__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11492,23 +11492,23 @@ define void @v_shuffle_v4f16_v4f16__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11545,20 +11545,20 @@ define void @v_shuffle_v4f16_v4f16__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11595,19 +11595,19 @@ define void @v_shuffle_v4f16_v4f16__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11644,20 +11644,20 @@ define void @v_shuffle_v4f16_v4f16__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11692,18 +11692,18 @@ define void @v_shuffle_v4f16_v4f16__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11748,23 +11748,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11807,23 +11807,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11868,23 +11868,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11927,23 +11927,23 @@ define void @v_shuffle_v4f16_v4f16__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -11982,20 +11982,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12032,20 +12032,20 @@ define void @v_shuffle_v4f16_v4f16__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12080,18 +12080,18 @@ define void @v_shuffle_v4f16_v4f16__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f16_v4f16__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f16_v4f16__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=v"() %vec1 = call <4 x half> asm "; def $0", "=v"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12136,17 +12136,17 @@ define void @s_shuffle_v4f16_v4f16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -12178,17 +12178,17 @@ define void @s_shuffle_v4f16_v4f16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -12220,17 +12220,17 @@ define void @s_shuffle_v4f16_v4f16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -12262,17 +12262,17 @@ define void @s_shuffle_v4f16_v4f16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -12318,17 +12318,17 @@ define void @s_shuffle_v4f16_v4f16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12361,17 +12361,17 @@ define void @s_shuffle_v4f16_v4f16__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12404,17 +12404,17 @@ define void @s_shuffle_v4f16_v4f16__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12455,21 +12455,21 @@ define void @s_shuffle_v4f16_v4f16__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12512,22 +12512,22 @@ define void @s_shuffle_v4f16_v4f16__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12568,21 +12568,21 @@ define void @s_shuffle_v4f16_v4f16__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12625,22 +12625,22 @@ define void @s_shuffle_v4f16_v4f16__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12675,18 +12675,18 @@ define void @s_shuffle_v4f16_v4f16__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12723,19 +12723,19 @@ define void @s_shuffle_v4f16_v4f16__7_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12770,18 +12770,18 @@ define void @s_shuffle_v4f16_v4f16__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12816,18 +12816,18 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12870,22 +12870,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12928,22 +12928,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -12984,21 +12984,21 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13041,22 +13041,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13093,19 +13093,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13142,19 +13142,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13189,18 +13189,18 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13235,18 +13235,18 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s9, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13289,22 +13289,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13349,23 +13349,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13408,22 +13408,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13468,23 +13468,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13521,19 +13521,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13572,20 +13572,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13622,19 +13622,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13671,19 +13671,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -13718,18 +13718,18 @@ define void @s_shuffle_v4f16_v4f16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -13763,18 +13763,18 @@ define void @s_shuffle_v4f16_v4f16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -13810,19 +13810,19 @@ define void @s_shuffle_v4f16_v4f16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -13856,18 +13856,18 @@ define void @s_shuffle_v4f16_v4f16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -13903,19 +13903,19 @@ define void @s_shuffle_v4f16_v4f16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -13949,18 +13949,18 @@ define void @s_shuffle_v4f16_v4f16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -14002,22 +14002,22 @@ define void @s_shuffle_v4f16_v4f16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14058,21 +14058,21 @@ define void @s_shuffle_v4f16_v4f16__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14115,22 +14115,22 @@ define void @s_shuffle_v4f16_v4f16__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14171,21 +14171,21 @@ define void @s_shuffle_v4f16_v4f16__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14230,23 +14230,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14289,22 +14289,22 @@ define void @s_shuffle_v4f16_v4f16__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14349,23 +14349,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14408,22 +14408,22 @@ define void @s_shuffle_v4f16_v4f16__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14468,23 +14468,23 @@ define void @s_shuffle_v4f16_v4f16__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14527,22 +14527,22 @@ define void @s_shuffle_v4f16_v4f16__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14585,22 +14585,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14643,22 +14643,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14703,23 +14703,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14762,22 +14762,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14822,23 +14822,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14881,22 +14881,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -14941,23 +14941,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15000,22 +15000,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15050,18 +15050,18 @@ define void @s_shuffle_v4f16_v4f16__u_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15095,18 +15095,18 @@ define void @s_shuffle_v4f16_v4f16__0_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15142,19 +15142,19 @@ define void @s_shuffle_v4f16_v4f16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15190,19 +15190,19 @@ define void @s_shuffle_v4f16_v4f16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15240,20 +15240,20 @@ define void @s_shuffle_v4f16_v4f16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15287,18 +15287,18 @@ define void @s_shuffle_v4f16_v4f16__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -15342,23 +15342,23 @@ define void @s_shuffle_v4f16_v4f16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15401,22 +15401,22 @@ define void @s_shuffle_v4f16_v4f16__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15461,23 +15461,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15520,22 +15520,22 @@ define void @s_shuffle_v4f16_v4f16__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15580,23 +15580,23 @@ define void @s_shuffle_v4f16_v4f16__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15641,23 +15641,23 @@ define void @s_shuffle_v4f16_v4f16__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15704,24 +15704,24 @@ define void @s_shuffle_v4f16_v4f16__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15766,23 +15766,23 @@ define void @s_shuffle_v4f16_v4f16__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15829,24 +15829,24 @@ define void @s_shuffle_v4f16_v4f16__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15891,23 +15891,23 @@ define void @s_shuffle_v4f16_v4f16__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -15952,23 +15952,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16011,22 +16011,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16069,22 +16069,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16129,23 +16129,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16192,24 +16192,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16254,23 +16254,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16317,24 +16317,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16379,23 +16379,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16430,18 +16430,18 @@ define void @s_shuffle_v4f16_v4f16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16475,18 +16475,18 @@ define void @s_shuffle_v4f16_v4f16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16522,19 +16522,19 @@ define void @s_shuffle_v4f16_v4f16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16568,18 +16568,18 @@ define void @s_shuffle_v4f16_v4f16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16615,19 +16615,19 @@ define void @s_shuffle_v4f16_v4f16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16661,18 +16661,18 @@ define void @s_shuffle_v4f16_v4f16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -16714,22 +16714,22 @@ define void @s_shuffle_v4f16_v4f16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16770,21 +16770,21 @@ define void @s_shuffle_v4f16_v4f16__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16827,22 +16827,22 @@ define void @s_shuffle_v4f16_v4f16__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16883,21 +16883,21 @@ define void @s_shuffle_v4f16_v4f16__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -16940,22 +16940,22 @@ define void @s_shuffle_v4f16_v4f16__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17000,23 +17000,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17061,23 +17061,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17120,22 +17120,22 @@ define void @s_shuffle_v4f16_v4f16__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17180,23 +17180,23 @@ define void @s_shuffle_v4f16_v4f16__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17239,22 +17239,22 @@ define void @s_shuffle_v4f16_v4f16__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17297,22 +17297,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17355,22 +17355,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17413,22 +17413,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17473,23 +17473,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17534,23 +17534,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17593,22 +17593,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17653,23 +17653,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17712,22 +17712,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -17764,19 +17764,19 @@ define void @s_shuffle_v4f16_v4f16__u_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -17812,19 +17812,19 @@ define void @s_shuffle_v4f16_v4f16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -17862,20 +17862,20 @@ define void @s_shuffle_v4f16_v4f16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -17911,19 +17911,19 @@ define void @s_shuffle_v4f16_v4f16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -17959,19 +17959,19 @@ define void @s_shuffle_v4f16_v4f16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -18007,19 +18007,19 @@ define void @s_shuffle_v4f16_v4f16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -18063,23 +18063,23 @@ define void @s_shuffle_v4f16_v4f16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18122,22 +18122,22 @@ define void @s_shuffle_v4f16_v4f16__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18182,23 +18182,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18241,22 +18241,22 @@ define void @s_shuffle_v4f16_v4f16__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18301,23 +18301,23 @@ define void @s_shuffle_v4f16_v4f16__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18364,24 +18364,24 @@ define void @s_shuffle_v4f16_v4f16__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18426,23 +18426,23 @@ define void @s_shuffle_v4f16_v4f16__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18487,23 +18487,23 @@ define void @s_shuffle_v4f16_v4f16__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18550,24 +18550,24 @@ define void @s_shuffle_v4f16_v4f16__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18612,23 +18612,23 @@ define void @s_shuffle_v4f16_v4f16__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18673,23 +18673,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18730,21 +18730,21 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18789,23 +18789,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18852,24 +18852,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18910,21 +18910,21 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -18969,23 +18969,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19032,24 +19032,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19094,23 +19094,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19155,17 +19155,17 @@ define void @s_shuffle_v4f16_v4f16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -19197,17 +19197,17 @@ define void @s_shuffle_v4f16_v4f16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -19239,17 +19239,17 @@ define void @s_shuffle_v4f16_v4f16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -19281,17 +19281,17 @@ define void @s_shuffle_v4f16_v4f16__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %shuf) @@ -19341,19 +19341,19 @@ define void @s_shuffle_v4f16_v4f16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19388,18 +19388,18 @@ define void @s_shuffle_v4f16_v4f16__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19436,19 +19436,19 @@ define void @s_shuffle_v4f16_v4f16__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19483,18 +19483,18 @@ define void @s_shuffle_v4f16_v4f16__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19537,22 +19537,22 @@ define void @s_shuffle_v4f16_v4f16__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19597,23 +19597,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19656,22 +19656,22 @@ define void @s_shuffle_v4f16_v4f16__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19716,23 +19716,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19771,20 +19771,20 @@ define void @s_shuffle_v4f16_v4f16__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19821,19 +19821,19 @@ define void @s_shuffle_v4f16_v4f16__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19870,19 +19870,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19919,19 +19919,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -19974,22 +19974,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20034,23 +20034,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20093,22 +20093,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20153,23 +20153,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20208,20 +20208,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20258,19 +20258,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20305,18 +20305,18 @@ define void @s_shuffle_v4f16_v4f16__u_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20359,22 +20359,22 @@ define void @s_shuffle_v4f16_v4f16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20419,23 +20419,23 @@ define void @s_shuffle_v4f16_v4f16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20478,22 +20478,22 @@ define void @s_shuffle_v4f16_v4f16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20538,23 +20538,23 @@ define void @s_shuffle_v4f16_v4f16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20589,18 +20589,18 @@ define void @s_shuffle_v4f16_v4f16__4_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s8, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s8, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20637,19 +20637,19 @@ define void @s_shuffle_v4f16_v4f16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20686,19 +20686,19 @@ define void @s_shuffle_v4f16_v4f16__6_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20737,20 +20737,20 @@ define void @s_shuffle_v4f16_v4f16__7_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20787,19 +20787,19 @@ define void @s_shuffle_v4f16_v4f16__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20844,23 +20844,23 @@ define void @s_shuffle_v4f16_v4f16__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20907,24 +20907,24 @@ define void @s_shuffle_v4f16_v4f16__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -20969,23 +20969,23 @@ define void @s_shuffle_v4f16_v4f16__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21032,24 +21032,24 @@ define void @s_shuffle_v4f16_v4f16__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21088,20 +21088,20 @@ define void @s_shuffle_v4f16_v4f16__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21140,20 +21140,20 @@ define void @s_shuffle_v4f16_v4f16__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21192,20 +21192,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21242,19 +21242,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21299,23 +21299,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21362,24 +21362,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21424,23 +21424,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21487,24 +21487,24 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21541,19 +21541,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21592,20 +21592,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21640,18 +21640,18 @@ define void @s_shuffle_v4f16_v4f16__u_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21692,21 +21692,21 @@ define void @s_shuffle_v4f16_v4f16__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21749,22 +21749,22 @@ define void @s_shuffle_v4f16_v4f16__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21805,21 +21805,21 @@ define void @s_shuffle_v4f16_v4f16__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21862,22 +21862,22 @@ define void @s_shuffle_v4f16_v4f16__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21912,18 +21912,18 @@ define void @s_shuffle_v4f16_v4f16__4_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -21960,19 +21960,19 @@ define void @s_shuffle_v4f16_v4f16__5_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22007,18 +22007,18 @@ define void @s_shuffle_v4f16_v4f16__6_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22055,19 +22055,19 @@ define void @s_shuffle_v4f16_v4f16__7_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22102,18 +22102,18 @@ define void @s_shuffle_v4f16_v4f16__7_u_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22156,22 +22156,22 @@ define void @s_shuffle_v4f16_v4f16__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22216,23 +22216,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22275,22 +22275,22 @@ define void @s_shuffle_v4f16_v4f16__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22335,23 +22335,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22388,19 +22388,19 @@ define void @s_shuffle_v4f16_v4f16__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22439,20 +22439,20 @@ define void @s_shuffle_v4f16_v4f16__7_5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22489,19 +22489,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22538,19 +22538,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22593,22 +22593,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22653,23 +22653,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22712,22 +22712,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22772,23 +22772,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22825,19 +22825,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22876,20 +22876,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22926,19 +22926,19 @@ define void @s_shuffle_v4f16_v4f16__u_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -22981,22 +22981,22 @@ define void @s_shuffle_v4f16_v4f16__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23041,23 +23041,23 @@ define void @s_shuffle_v4f16_v4f16__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23100,22 +23100,22 @@ define void @s_shuffle_v4f16_v4f16__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23160,23 +23160,23 @@ define void @s_shuffle_v4f16_v4f16__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23213,19 +23213,19 @@ define void @s_shuffle_v4f16_v4f16__4_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23264,20 +23264,20 @@ define void @s_shuffle_v4f16_v4f16__5_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23314,19 +23314,19 @@ define void @s_shuffle_v4f16_v4f16__6_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23361,18 +23361,18 @@ define void @s_shuffle_v4f16_v4f16__7_u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s8, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s8, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23415,22 +23415,22 @@ define void @s_shuffle_v4f16_v4f16__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23475,23 +23475,23 @@ define void @s_shuffle_v4f16_v4f16__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23534,22 +23534,22 @@ define void @s_shuffle_v4f16_v4f16__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23594,23 +23594,23 @@ define void @s_shuffle_v4f16_v4f16__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23647,19 +23647,19 @@ define void @s_shuffle_v4f16_v4f16__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23698,20 +23698,20 @@ define void @s_shuffle_v4f16_v4f16__7_5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23748,19 +23748,19 @@ define void @s_shuffle_v4f16_v4f16__7_6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23795,18 +23795,18 @@ define void @s_shuffle_v4f16_v4f16__7_7_u_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23849,22 +23849,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23909,23 +23909,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -23968,22 +23968,22 @@ define void @s_shuffle_v4f16_v4f16__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -24028,23 +24028,23 @@ define void @s_shuffle_v4f16_v4f16__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -24081,19 +24081,19 @@ define void @s_shuffle_v4f16_v4f16__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -24132,20 +24132,20 @@ define void @s_shuffle_v4f16_v4f16__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> @@ -24180,18 +24180,18 @@ define void @s_shuffle_v4f16_v4f16__7_7_6_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f16_v4f16__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s9, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f16_v4f16__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s9, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x half> asm "; def $0", "=s"() %vec1 = call <4 x half> asm "; def $0", "=s"() %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll index 9e3c044a76295..df148f299a165 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f32_v2f32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4f32_v2f32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4f32_v2f32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v4f32_v2f32__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4f32_v2f32__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -234,21 +234,21 @@ define void @v_shuffle_v4f32_v2f32__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -282,19 +282,19 @@ define void @v_shuffle_v4f32_v2f32__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -327,17 +327,17 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -376,20 +376,20 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -429,21 +429,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -478,19 +478,19 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -525,18 +525,18 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -577,21 +577,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -632,21 +632,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -682,18 +682,18 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -730,19 +730,19 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -777,18 +777,18 @@ define void @v_shuffle_v4f32_v2f32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -824,19 +824,19 @@ define void @v_shuffle_v4f32_v2f32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> zeroinitializer store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -871,18 +871,18 @@ define void @v_shuffle_v4f32_v2f32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -916,18 +916,18 @@ define void @v_shuffle_v4f32_v2f32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -968,21 +968,21 @@ define void @v_shuffle_v4f32_v2f32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1023,21 +1023,21 @@ define void @v_shuffle_v4f32_v2f32__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1080,22 +1080,22 @@ define void @v_shuffle_v4f32_v2f32__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1136,21 +1136,21 @@ define void @v_shuffle_v4f32_v2f32__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1191,21 +1191,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1246,21 +1246,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1301,21 +1301,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1358,22 +1358,22 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1408,19 +1408,19 @@ define void @v_shuffle_v4f32_v2f32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1454,19 +1454,19 @@ define void @v_shuffle_v4f32_v2f32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1502,19 +1502,19 @@ define void @v_shuffle_v4f32_v2f32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1548,19 +1548,19 @@ define void @v_shuffle_v4f32_v2f32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1602,22 +1602,22 @@ define void @v_shuffle_v4f32_v2f32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1658,21 +1658,21 @@ define void @v_shuffle_v4f32_v2f32__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1713,21 +1713,21 @@ define void @v_shuffle_v4f32_v2f32__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1768,21 +1768,21 @@ define void @v_shuffle_v4f32_v2f32__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1823,21 +1823,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1876,20 +1876,20 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1928,20 +1928,20 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -1984,22 +1984,22 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2041,16 +2041,16 @@ define void @v_shuffle_v4f32_v2f32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2082,17 +2082,17 @@ define void @v_shuffle_v4f32_v2f32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2138,18 +2138,18 @@ define void @v_shuffle_v4f32_v2f32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2184,18 +2184,18 @@ define void @v_shuffle_v4f32_v2f32__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2236,21 +2236,21 @@ define void @v_shuffle_v4f32_v2f32__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2291,21 +2291,21 @@ define void @v_shuffle_v4f32_v2f32__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2342,19 +2342,19 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2391,19 +2391,19 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2445,23 +2445,23 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2502,21 +2502,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2551,19 +2551,19 @@ define void @v_shuffle_v4f32_v2f32__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2604,21 +2604,21 @@ define void @v_shuffle_v4f32_v2f32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2661,22 +2661,22 @@ define void @v_shuffle_v4f32_v2f32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2711,19 +2711,19 @@ define void @v_shuffle_v4f32_v2f32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2758,18 +2758,18 @@ define void @v_shuffle_v4f32_v2f32__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2810,21 +2810,21 @@ define void @v_shuffle_v4f32_v2f32__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2865,21 +2865,21 @@ define void @v_shuffle_v4f32_v2f32__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2914,18 +2914,18 @@ define void @v_shuffle_v4f32_v2f32__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -2960,18 +2960,18 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3012,21 +3012,21 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3068,22 +3068,22 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3118,19 +3118,19 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v2f32__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v2f32__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=v"() %vec1 = call <2 x float> asm "; def $0", "=v"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3175,17 +3175,17 @@ define void @s_shuffle_v4f32_v2f32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -3217,17 +3217,17 @@ define void @s_shuffle_v4f32_v2f32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -3273,17 +3273,17 @@ define void @s_shuffle_v4f32_v2f32__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3324,21 +3324,21 @@ define void @s_shuffle_v4f32_v2f32__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3377,20 +3377,20 @@ define void @s_shuffle_v4f32_v2f32__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3425,18 +3425,18 @@ define void @s_shuffle_v4f32_v2f32__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3518,21 +3518,21 @@ define void @s_shuffle_v4f32_v2f32__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3615,22 +3615,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3692,20 +3692,20 @@ define void @s_shuffle_v4f32_v2f32__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3847,22 +3847,22 @@ define void @s_shuffle_v4f32_v2f32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3903,21 +3903,21 @@ define void @s_shuffle_v4f32_v2f32__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -3960,22 +3960,22 @@ define void @s_shuffle_v4f32_v2f32__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4018,22 +4018,22 @@ define void @s_shuffle_v4f32_v2f32__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4097,21 +4097,21 @@ define void @s_shuffle_v4f32_v2f32__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4154,22 +4154,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4212,22 +4212,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4347,22 +4347,22 @@ define void @s_shuffle_v4f32_v2f32__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4403,21 +4403,21 @@ define void @s_shuffle_v4f32_v2f32__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4460,22 +4460,22 @@ define void @s_shuffle_v4f32_v2f32__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4518,22 +4518,22 @@ define void @s_shuffle_v4f32_v2f32__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4643,22 +4643,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4703,17 +4703,17 @@ define void @s_shuffle_v4f32_v2f32__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -4745,17 +4745,17 @@ define void @s_shuffle_v4f32_v2f32__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -4852,22 +4852,22 @@ define void @s_shuffle_v4f32_v2f32__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -4948,19 +4948,19 @@ define void @s_shuffle_v4f32_v2f32__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -5003,22 +5003,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -5063,23 +5063,23 @@ define void @s_shuffle_v4f32_v2f32__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -5165,22 +5165,22 @@ define void @s_shuffle_v4f32_v2f32__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -5263,22 +5263,22 @@ define void @s_shuffle_v4f32_v2f32__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> @@ -5408,22 +5408,22 @@ define void @s_shuffle_v4f32_v2f32__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v2f32__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v2f32__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x float> asm "; def $0", "=s"() %vec1 = call <2 x float> asm "; def $0", "=s"() %shuf = shufflevector <2 x float> %vec0, <2 x float> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll index 31c458f5338cb..d4ee6fa20cad8 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f32_v3f32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4f32_v3f32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4f32_v3f32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4f32_v3f32__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v4f32_v3f32__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v4f32_v3f32__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -267,21 +267,21 @@ define void @v_shuffle_v4f32_v3f32__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v4f32_v3f32__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -375,21 +375,21 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -423,19 +423,19 @@ define void @v_shuffle_v4f32_v3f32__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -468,17 +468,17 @@ define void @v_shuffle_v4f32_v3f32__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -513,18 +513,18 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -565,22 +565,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -622,23 +622,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -679,22 +679,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -729,19 +729,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -777,19 +777,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -824,18 +824,18 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -877,22 +877,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -933,22 +933,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -990,22 +990,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1043,21 +1043,21 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1094,19 +1094,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1143,19 +1143,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1191,19 +1191,19 @@ define void @v_shuffle_v4f32_v3f32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1239,19 +1239,19 @@ define void @v_shuffle_v4f32_v3f32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> zeroinitializer store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1287,20 +1287,20 @@ define void @v_shuffle_v4f32_v3f32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1337,21 +1337,21 @@ define void @v_shuffle_v4f32_v3f32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1386,19 +1386,19 @@ define void @v_shuffle_v4f32_v3f32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1440,23 +1440,23 @@ define void @v_shuffle_v4f32_v3f32__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1500,23 +1500,23 @@ define void @v_shuffle_v4f32_v3f32__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1559,22 +1559,22 @@ define void @v_shuffle_v4f32_v3f32__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1618,23 +1618,23 @@ define void @v_shuffle_v4f32_v3f32__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1678,23 +1678,23 @@ define void @v_shuffle_v4f32_v3f32__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1738,23 +1738,23 @@ define void @v_shuffle_v4f32_v3f32__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1796,22 +1796,22 @@ define void @v_shuffle_v4f32_v3f32__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1855,23 +1855,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1914,22 +1914,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -1972,23 +1972,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2032,23 +2032,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2092,23 +2092,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2151,23 +2151,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2202,18 +2202,18 @@ define void @v_shuffle_v4f32_v3f32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2247,18 +2247,18 @@ define void @v_shuffle_v4f32_v3f32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2294,19 +2294,19 @@ define void @v_shuffle_v4f32_v3f32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2342,19 +2342,19 @@ define void @v_shuffle_v4f32_v3f32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v4f32_v3f32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2441,23 +2441,23 @@ define void @v_shuffle_v4f32_v3f32__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2500,23 +2500,23 @@ define void @v_shuffle_v4f32_v3f32__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2558,23 +2558,23 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2616,22 +2616,22 @@ define void @v_shuffle_v4f32_v3f32__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2674,23 +2674,23 @@ define void @v_shuffle_v4f32_v3f32__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4f32_v3f32__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2789,22 +2789,22 @@ define void @v_shuffle_v4f32_v3f32__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2847,23 +2847,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2905,22 +2905,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -2961,22 +2961,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3019,23 +3019,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3078,23 +3078,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3137,23 +3137,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3188,18 +3188,18 @@ define void @v_shuffle_v4f32_v3f32__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3233,18 +3233,18 @@ define void @v_shuffle_v4f32_v3f32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3280,19 +3280,19 @@ define void @v_shuffle_v4f32_v3f32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3328,19 +3328,19 @@ define void @v_shuffle_v4f32_v3f32__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3374,18 +3374,18 @@ define void @v_shuffle_v4f32_v3f32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3427,22 +3427,22 @@ define void @v_shuffle_v4f32_v3f32__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3485,22 +3485,22 @@ define void @v_shuffle_v4f32_v3f32__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3541,21 +3541,21 @@ define void @v_shuffle_v4f32_v3f32__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3598,23 +3598,23 @@ define void @v_shuffle_v4f32_v3f32__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3655,21 +3655,21 @@ define void @v_shuffle_v4f32_v3f32__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3712,22 +3712,22 @@ define void @v_shuffle_v4f32_v3f32__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3769,22 +3769,22 @@ define void @v_shuffle_v4f32_v3f32__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3827,22 +3827,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3884,22 +3884,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -3942,22 +3942,22 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4000,23 +4000,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4060,23 +4060,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4120,24 +4120,24 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4179,16 +4179,16 @@ define void @v_shuffle_v4f32_v3f32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4220,17 +4220,17 @@ define void @v_shuffle_v4f32_v3f32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4262,17 +4262,17 @@ define void @v_shuffle_v4f32_v3f32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4319,20 +4319,20 @@ define void @v_shuffle_v4f32_v3f32__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4370,21 +4370,21 @@ define void @v_shuffle_v4f32_v3f32__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4421,20 +4421,20 @@ define void @v_shuffle_v4f32_v3f32__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4477,23 +4477,23 @@ define void @v_shuffle_v4f32_v3f32__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4535,23 +4535,23 @@ define void @v_shuffle_v4f32_v3f32__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4595,23 +4595,23 @@ define void @v_shuffle_v4f32_v3f32__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4649,21 +4649,21 @@ define void @v_shuffle_v4f32_v3f32__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4701,21 +4701,21 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4752,19 +4752,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4806,23 +4806,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4864,23 +4864,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4922,23 +4922,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -4975,19 +4975,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5022,18 +5022,18 @@ define void @v_shuffle_v4f32_v3f32__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5074,22 +5074,22 @@ define void @v_shuffle_v4f32_v3f32__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5132,23 +5132,23 @@ define void @v_shuffle_v4f32_v3f32__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5191,23 +5191,23 @@ define void @v_shuffle_v4f32_v3f32__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5242,18 +5242,18 @@ define void @v_shuffle_v4f32_v3f32__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5290,19 +5290,19 @@ define void @v_shuffle_v4f32_v3f32__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5339,19 +5339,19 @@ define void @v_shuffle_v4f32_v3f32__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5387,19 +5387,19 @@ define void @v_shuffle_v4f32_v3f32__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5442,23 +5442,23 @@ define void @v_shuffle_v4f32_v3f32__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5499,22 +5499,22 @@ define void @v_shuffle_v4f32_v3f32__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5558,23 +5558,23 @@ define void @v_shuffle_v4f32_v3f32__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5610,19 +5610,19 @@ define void @v_shuffle_v4f32_v3f32__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5659,19 +5659,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5707,19 +5707,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5762,23 +5762,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5822,23 +5822,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5881,23 +5881,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5932,19 +5932,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -5979,18 +5979,18 @@ define void @v_shuffle_v4f32_v3f32__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6032,23 +6032,23 @@ define void @v_shuffle_v4f32_v3f32__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6091,23 +6091,23 @@ define void @v_shuffle_v4f32_v3f32__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6150,23 +6150,23 @@ define void @v_shuffle_v4f32_v3f32__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6201,18 +6201,18 @@ define void @v_shuffle_v4f32_v3f32__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6249,19 +6249,19 @@ define void @v_shuffle_v4f32_v3f32__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6296,18 +6296,18 @@ define void @v_shuffle_v4f32_v3f32__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6350,22 +6350,22 @@ define void @v_shuffle_v4f32_v3f32__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6408,23 +6408,23 @@ define void @v_shuffle_v4f32_v3f32__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6467,23 +6467,23 @@ define void @v_shuffle_v4f32_v3f32__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6520,20 +6520,20 @@ define void @v_shuffle_v4f32_v3f32__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6568,18 +6568,18 @@ define void @v_shuffle_v4f32_v3f32__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6615,19 +6615,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6670,23 +6670,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6729,23 +6729,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6788,23 +6788,23 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6841,19 +6841,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6889,19 +6889,19 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=v"() %vec1 = call <3 x float> asm "; def $0", "=v"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -6946,17 +6946,17 @@ define void @s_shuffle_v4f32_v3f32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -6988,17 +6988,17 @@ define void @s_shuffle_v4f32_v3f32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -7030,17 +7030,17 @@ define void @s_shuffle_v4f32_v3f32__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -7086,17 +7086,17 @@ define void @s_shuffle_v4f32_v3f32__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7129,17 +7129,17 @@ define void @s_shuffle_v4f32_v3f32__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7180,21 +7180,21 @@ define void @s_shuffle_v4f32_v3f32__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7233,20 +7233,20 @@ define void @s_shuffle_v4f32_v3f32__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7287,21 +7287,21 @@ define void @s_shuffle_v4f32_v3f32__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7336,18 +7336,18 @@ define void @s_shuffle_v4f32_v3f32__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7401,18 +7401,18 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7455,22 +7455,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7513,22 +7513,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7569,21 +7569,21 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7620,19 +7620,19 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7669,19 +7669,19 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7744,22 +7744,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7802,22 +7802,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7860,22 +7860,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7914,20 +7914,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -7966,20 +7966,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8037,19 +8037,19 @@ define void @s_shuffle_v4f32_v3f32__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -8107,20 +8107,20 @@ define void @s_shuffle_v4f32_v3f32__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -8158,20 +8158,20 @@ define void @s_shuffle_v4f32_v3f32__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -8207,19 +8207,19 @@ define void @s_shuffle_v4f32_v3f32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -8263,23 +8263,23 @@ define void @s_shuffle_v4f32_v3f32__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8324,23 +8324,23 @@ define void @s_shuffle_v4f32_v3f32__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8383,22 +8383,22 @@ define void @s_shuffle_v4f32_v3f32__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8443,23 +8443,23 @@ define void @s_shuffle_v4f32_v3f32__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8504,23 +8504,23 @@ define void @s_shuffle_v4f32_v3f32__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8565,23 +8565,23 @@ define void @s_shuffle_v4f32_v3f32__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8624,22 +8624,22 @@ define void @s_shuffle_v4f32_v3f32__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8684,23 +8684,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8743,22 +8743,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8803,23 +8803,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8864,23 +8864,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8925,23 +8925,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -8986,23 +8986,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9142,22 +9142,22 @@ define void @s_shuffle_v4f32_v3f32__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9200,22 +9200,22 @@ define void @s_shuffle_v4f32_v3f32__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9258,22 +9258,22 @@ define void @s_shuffle_v4f32_v3f32__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9318,23 +9318,23 @@ define void @s_shuffle_v4f32_v3f32__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9379,23 +9379,23 @@ define void @s_shuffle_v4f32_v3f32__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9440,23 +9440,23 @@ define void @s_shuffle_v4f32_v3f32__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @s_shuffle_v4f32_v3f32__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9559,23 +9559,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9618,22 +9618,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9739,23 +9739,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9800,23 +9800,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -9861,23 +9861,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10017,22 +10017,22 @@ define void @s_shuffle_v4f32_v3f32__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10075,22 +10075,22 @@ define void @s_shuffle_v4f32_v3f32__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10131,21 +10131,21 @@ define void @s_shuffle_v4f32_v3f32__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10190,23 +10190,23 @@ define void @s_shuffle_v4f32_v3f32__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10247,21 +10247,21 @@ define void @s_shuffle_v4f32_v3f32__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10304,22 +10304,22 @@ define void @s_shuffle_v4f32_v3f32__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10362,22 +10362,22 @@ define void @s_shuffle_v4f32_v3f32__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10420,22 +10420,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10478,22 +10478,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10538,23 +10538,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10599,23 +10599,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10660,23 +10660,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10721,23 +10721,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10782,17 +10782,17 @@ define void @s_shuffle_v4f32_v3f32__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -10824,17 +10824,17 @@ define void @s_shuffle_v4f32_v3f32__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -10866,17 +10866,17 @@ define void @s_shuffle_v4f32_v3f32__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -10928,20 +10928,20 @@ define void @s_shuffle_v4f32_v3f32__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -10980,20 +10980,20 @@ define void @s_shuffle_v4f32_v3f32__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11030,19 +11030,19 @@ define void @s_shuffle_v4f32_v3f32__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11087,23 +11087,23 @@ define void @s_shuffle_v4f32_v3f32__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11146,22 +11146,22 @@ define void @s_shuffle_v4f32_v3f32__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11206,23 +11206,23 @@ define void @s_shuffle_v4f32_v3f32__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11261,20 +11261,20 @@ define void @s_shuffle_v4f32_v3f32__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11313,20 +11313,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11363,19 +11363,19 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11420,23 +11420,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11481,23 +11481,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11540,22 +11540,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11594,20 +11594,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11670,22 +11670,22 @@ define void @s_shuffle_v4f32_v3f32__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11728,22 +11728,22 @@ define void @s_shuffle_v4f32_v3f32__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11786,22 +11786,22 @@ define void @s_shuffle_v4f32_v3f32__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11900,19 +11900,19 @@ define void @s_shuffle_v4f32_v3f32__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -11957,23 +11957,23 @@ define void @s_shuffle_v4f32_v3f32__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12016,22 +12016,22 @@ define void @s_shuffle_v4f32_v3f32__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12076,23 +12076,23 @@ define void @s_shuffle_v4f32_v3f32__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12131,20 +12131,20 @@ define void @s_shuffle_v4f32_v3f32__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12183,20 +12183,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12233,19 +12233,19 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12290,23 +12290,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12351,23 +12351,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12410,22 +12410,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12464,20 +12464,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12540,22 +12540,22 @@ define void @s_shuffle_v4f32_v3f32__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12598,22 +12598,22 @@ define void @s_shuffle_v4f32_v3f32__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12656,22 +12656,22 @@ define void @s_shuffle_v4f32_v3f32__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12775,22 +12775,22 @@ define void @s_shuffle_v4f32_v3f32__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12833,22 +12833,22 @@ define void @s_shuffle_v4f32_v3f32__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12891,22 +12891,22 @@ define void @s_shuffle_v4f32_v3f32__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -12945,20 +12945,20 @@ define void @s_shuffle_v4f32_v3f32__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13015,19 +13015,19 @@ define void @s_shuffle_v4f32_v3f32__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13072,23 +13072,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13133,23 +13133,23 @@ define void @s_shuffle_v4f32_v3f32__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13192,22 +13192,22 @@ define void @s_shuffle_v4f32_v3f32__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13246,20 +13246,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> @@ -13298,20 +13298,20 @@ define void @s_shuffle_v4f32_v3f32__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v3f32__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v3f32__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x float> asm "; def $0", "=s"() %vec1 = call <3 x float> asm "; def $0", "=s"() %shuf = shufflevector <3 x float> %vec0, <3 x float> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll index e3427cd35c683..edc540edb3ad1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4f32_v4f32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4f32_v4f32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4f32_v4f32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4f32_v4f32__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -163,17 +163,17 @@ define void @v_shuffle_v4f32_v4f32__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -216,17 +216,17 @@ define void @v_shuffle_v4f32_v4f32__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4f32_v4f32__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4f32_v4f32__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -352,21 +352,21 @@ define void @v_shuffle_v4f32_v4f32__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v4f32_v4f32__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v4f32_v4f32__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -562,17 +562,17 @@ define void @v_shuffle_v4f32_v4f32__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -605,17 +605,17 @@ define void @v_shuffle_v4f32_v4f32__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v4f32_v4f32__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -864,22 +864,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -922,22 +922,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -973,18 +973,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1019,18 +1019,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1065,18 +1065,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1113,19 +1113,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1169,23 +1169,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1230,24 +1230,24 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1291,23 +1291,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1350,23 +1350,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1402,18 +1402,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1452,21 +1452,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1503,18 +1503,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1551,19 +1551,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1600,19 +1600,19 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1648,19 +1648,19 @@ define void @v_shuffle_v4f32_v4f32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> zeroinitializer store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1696,19 +1696,19 @@ define void @v_shuffle_v4f32_v4f32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1745,19 +1745,19 @@ define void @v_shuffle_v4f32_v4f32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1794,19 +1794,19 @@ define void @v_shuffle_v4f32_v4f32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1842,19 +1842,19 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1897,23 +1897,23 @@ define void @v_shuffle_v4f32_v4f32__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -1958,24 +1958,24 @@ define void @v_shuffle_v4f32_v4f32__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2019,23 +2019,23 @@ define void @v_shuffle_v4f32_v4f32__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2078,23 +2078,23 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2139,24 +2139,24 @@ define void @v_shuffle_v4f32_v4f32__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2199,22 +2199,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2258,22 +2258,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2317,23 +2317,23 @@ define void @v_shuffle_v4f32_v4f32__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2376,23 +2376,23 @@ define void @v_shuffle_v4f32_v4f32__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2436,23 +2436,23 @@ define void @v_shuffle_v4f32_v4f32__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2497,24 +2497,24 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2557,23 +2557,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2617,23 +2617,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2678,23 +2678,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2737,22 +2737,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2796,22 +2796,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2854,23 +2854,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4f32_v4f32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4f32_v4f32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4f32_v4f32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3104,19 +3104,19 @@ define void @v_shuffle_v4f32_v4f32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3152,19 +3152,19 @@ define void @v_shuffle_v4f32_v4f32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3198,18 +3198,18 @@ define void @v_shuffle_v4f32_v4f32__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3251,23 +3251,23 @@ define void @v_shuffle_v4f32_v4f32__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3310,23 +3310,23 @@ define void @v_shuffle_v4f32_v4f32__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3369,23 +3369,23 @@ define void @v_shuffle_v4f32_v4f32__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3428,23 +3428,23 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3488,23 +3488,23 @@ define void @v_shuffle_v4f32_v4f32__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3548,22 +3548,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3606,22 +3606,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3665,23 +3665,23 @@ define void @v_shuffle_v4f32_v4f32__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3724,23 +3724,23 @@ define void @v_shuffle_v4f32_v4f32__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3784,23 +3784,23 @@ define void @v_shuffle_v4f32_v4f32__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3845,24 +3845,24 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3905,23 +3905,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -3966,24 +3966,24 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4028,23 +4028,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4089,23 +4089,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4149,22 +4149,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4207,22 +4207,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4265,23 +4265,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4316,18 +4316,18 @@ define void @v_shuffle_v4f32_v4f32__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4361,18 +4361,18 @@ define void @v_shuffle_v4f32_v4f32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4407,18 +4407,18 @@ define void @v_shuffle_v4f32_v4f32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4454,19 +4454,19 @@ define void @v_shuffle_v4f32_v4f32__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4501,18 +4501,18 @@ define void @v_shuffle_v4f32_v4f32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4546,18 +4546,18 @@ define void @v_shuffle_v4f32_v4f32__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4598,22 +4598,22 @@ define void @v_shuffle_v4f32_v4f32__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4656,22 +4656,22 @@ define void @v_shuffle_v4f32_v4f32__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4713,22 +4713,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4769,21 +4769,21 @@ define void @v_shuffle_v4f32_v4f32__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4826,21 +4826,21 @@ define void @v_shuffle_v4f32_v4f32__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4881,21 +4881,21 @@ define void @v_shuffle_v4f32_v4f32__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4938,22 +4938,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -4995,21 +4995,21 @@ define void @v_shuffle_v4f32_v4f32__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5052,22 +5052,22 @@ define void @v_shuffle_v4f32_v4f32__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5109,21 +5109,21 @@ define void @v_shuffle_v4f32_v4f32__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5166,22 +5166,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5284,23 +5284,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5344,22 +5344,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5403,22 +5403,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5462,22 +5462,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5520,23 +5520,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5579,22 +5579,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5629,18 +5629,18 @@ define void @v_shuffle_v4f32_v4f32__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5674,18 +5674,18 @@ define void @v_shuffle_v4f32_v4f32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5721,19 +5721,19 @@ define void @v_shuffle_v4f32_v4f32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5769,19 +5769,19 @@ define void @v_shuffle_v4f32_v4f32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5817,19 +5817,19 @@ define void @v_shuffle_v4f32_v4f32__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5863,18 +5863,18 @@ define void @v_shuffle_v4f32_v4f32__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5916,22 +5916,22 @@ define void @v_shuffle_v4f32_v4f32__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -5974,22 +5974,22 @@ define void @v_shuffle_v4f32_v4f32__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6032,22 +6032,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6088,21 +6088,21 @@ define void @v_shuffle_v4f32_v4f32__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6145,21 +6145,21 @@ define void @v_shuffle_v4f32_v4f32__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6200,21 +6200,21 @@ define void @v_shuffle_v4f32_v4f32__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6256,22 +6256,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6313,21 +6313,21 @@ define void @v_shuffle_v4f32_v4f32__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6370,22 +6370,22 @@ define void @v_shuffle_v4f32_v4f32__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6427,21 +6427,21 @@ define void @v_shuffle_v4f32_v4f32__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6484,22 +6484,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6540,22 +6540,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6600,23 +6600,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6661,23 +6661,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6718,22 +6718,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6776,23 +6776,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6835,23 +6835,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6894,22 +6894,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -6951,16 +6951,16 @@ define void @v_shuffle_v4f32_v4f32__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6992,17 +6992,17 @@ define void @v_shuffle_v4f32_v4f32__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7034,17 +7034,17 @@ define void @v_shuffle_v4f32_v4f32__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7076,17 +7076,17 @@ define void @v_shuffle_v4f32_v4f32__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> store <4 x float> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7133,19 +7133,19 @@ define void @v_shuffle_v4f32_v4f32__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7183,19 +7183,19 @@ define void @v_shuffle_v4f32_v4f32__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7233,19 +7233,19 @@ define void @v_shuffle_v4f32_v4f32__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7282,19 +7282,19 @@ define void @v_shuffle_v4f32_v4f32__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7338,23 +7338,23 @@ define void @v_shuffle_v4f32_v4f32__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7396,23 +7396,23 @@ define void @v_shuffle_v4f32_v4f32__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7455,23 +7455,23 @@ define void @v_shuffle_v4f32_v4f32__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7516,23 +7516,23 @@ define void @v_shuffle_v4f32_v4f32__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7571,20 +7571,20 @@ define void @v_shuffle_v4f32_v4f32__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7621,19 +7621,19 @@ define void @v_shuffle_v4f32_v4f32__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7671,19 +7671,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7719,18 +7719,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7774,22 +7774,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7833,23 +7833,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7891,23 +7891,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -7951,23 +7951,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8003,18 +8003,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8053,21 +8053,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8102,18 +8102,18 @@ define void @v_shuffle_v4f32_v4f32__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8154,22 +8154,22 @@ define void @v_shuffle_v4f32_v4f32__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8212,23 +8212,23 @@ define void @v_shuffle_v4f32_v4f32__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8271,22 +8271,22 @@ define void @v_shuffle_v4f32_v4f32__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8329,22 +8329,22 @@ define void @v_shuffle_v4f32_v4f32__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8379,18 +8379,18 @@ define void @v_shuffle_v4f32_v4f32__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8427,19 +8427,19 @@ define void @v_shuffle_v4f32_v4f32__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8476,19 +8476,19 @@ define void @v_shuffle_v4f32_v4f32__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4f32_v4f32__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8574,19 +8574,19 @@ define void @v_shuffle_v4f32_v4f32__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8629,22 +8629,22 @@ define void @v_shuffle_v4f32_v4f32__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8685,22 +8685,22 @@ define void @v_shuffle_v4f32_v4f32__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8744,23 +8744,23 @@ define void @v_shuffle_v4f32_v4f32__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8805,23 +8805,23 @@ define void @v_shuffle_v4f32_v4f32__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8859,19 +8859,19 @@ define void @v_shuffle_v4f32_v4f32__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8908,19 +8908,19 @@ define void @v_shuffle_v4f32_v4f32__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -8957,19 +8957,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9004,18 +9004,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9058,22 +9058,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9116,22 +9116,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9174,23 +9174,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9235,23 +9235,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9288,19 +9288,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9339,21 +9339,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9388,18 +9388,18 @@ define void @v_shuffle_v4f32_v4f32__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9441,23 +9441,23 @@ define void @v_shuffle_v4f32_v4f32__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @v_shuffle_v4f32_v4f32__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9557,22 +9557,22 @@ define void @v_shuffle_v4f32_v4f32__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9614,22 +9614,22 @@ define void @v_shuffle_v4f32_v4f32__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9664,18 +9664,18 @@ define void @v_shuffle_v4f32_v4f32__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9711,18 +9711,18 @@ define void @v_shuffle_v4f32_v4f32__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9759,19 +9759,19 @@ define void @v_shuffle_v4f32_v4f32__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9807,18 +9807,18 @@ define void @v_shuffle_v4f32_v4f32__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9853,18 +9853,18 @@ define void @v_shuffle_v4f32_v4f32__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9906,22 +9906,22 @@ define void @v_shuffle_v4f32_v4f32__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -9964,23 +9964,23 @@ define void @v_shuffle_v4f32_v4f32__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10022,22 +10022,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10080,22 +10080,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10131,18 +10131,18 @@ define void @v_shuffle_v4f32_v4f32__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10177,18 +10177,18 @@ define void @v_shuffle_v4f32_v4f32__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10225,19 +10225,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10274,19 +10274,19 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10331,23 +10331,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10391,23 +10391,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10450,23 +10450,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10510,23 +10510,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10565,21 +10565,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10616,18 +10616,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10662,18 +10662,18 @@ define void @v_shuffle_v4f32_v4f32__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10716,23 +10716,23 @@ define void @v_shuffle_v4f32_v4f32__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10775,23 +10775,23 @@ define void @v_shuffle_v4f32_v4f32__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10834,22 +10834,22 @@ define void @v_shuffle_v4f32_v4f32__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10892,22 +10892,22 @@ define void @v_shuffle_v4f32_v4f32__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10942,18 +10942,18 @@ define void @v_shuffle_v4f32_v4f32__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -10990,19 +10990,19 @@ define void @v_shuffle_v4f32_v4f32__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11039,19 +11039,19 @@ define void @v_shuffle_v4f32_v4f32__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11086,18 +11086,18 @@ define void @v_shuffle_v4f32_v4f32__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11139,22 +11139,22 @@ define void @v_shuffle_v4f32_v4f32__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11197,23 +11197,23 @@ define void @v_shuffle_v4f32_v4f32__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11255,22 +11255,22 @@ define void @v_shuffle_v4f32_v4f32__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11313,22 +11313,22 @@ define void @v_shuffle_v4f32_v4f32__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11365,18 +11365,18 @@ define void @v_shuffle_v4f32_v4f32__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11411,18 +11411,18 @@ define void @v_shuffle_v4f32_v4f32__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11458,18 +11458,18 @@ define void @v_shuffle_v4f32_v4f32__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11504,18 +11504,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11558,23 +11558,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11617,23 +11617,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11676,23 +11676,23 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11735,22 +11735,22 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11789,21 +11789,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11842,21 +11842,21 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11891,18 +11891,18 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4f32_v4f32__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=v"() %vec1 = call <4 x float> asm "; def $0", "=v"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -11947,17 +11947,17 @@ define void @s_shuffle_v4f32_v4f32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -11989,17 +11989,17 @@ define void @s_shuffle_v4f32_v4f32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -12031,17 +12031,17 @@ define void @s_shuffle_v4f32_v4f32__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -12073,17 +12073,17 @@ define void @s_shuffle_v4f32_v4f32__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -12129,17 +12129,17 @@ define void @s_shuffle_v4f32_v4f32__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12172,17 +12172,17 @@ define void @s_shuffle_v4f32_v4f32__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12215,17 +12215,17 @@ define void @s_shuffle_v4f32_v4f32__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12266,21 +12266,21 @@ define void @s_shuffle_v4f32_v4f32__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12319,20 +12319,20 @@ define void @s_shuffle_v4f32_v4f32__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12373,21 +12373,21 @@ define void @s_shuffle_v4f32_v4f32__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12428,21 +12428,21 @@ define void @s_shuffle_v4f32_v4f32__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12477,18 +12477,18 @@ define void @s_shuffle_v4f32_v4f32__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12542,18 +12542,18 @@ define void @s_shuffle_v4f32_v4f32__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12588,18 +12588,18 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12642,22 +12642,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12700,22 +12700,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12756,21 +12756,21 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12813,22 +12813,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12865,19 +12865,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12914,19 +12914,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -12983,19 +12983,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13040,23 +13040,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13101,23 +13101,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13162,23 +13162,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13221,22 +13221,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13275,20 +13275,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13327,20 +13327,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13379,20 +13379,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13450,19 +13450,19 @@ define void @s_shuffle_v4f32_v4f32__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -13520,20 +13520,20 @@ define void @s_shuffle_v4f32_v4f32__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -13571,20 +13571,20 @@ define void @s_shuffle_v4f32_v4f32__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -13622,20 +13622,20 @@ define void @s_shuffle_v4f32_v4f32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -13671,19 +13671,19 @@ define void @s_shuffle_v4f32_v4f32__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -13727,23 +13727,23 @@ define void @s_shuffle_v4f32_v4f32__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13788,23 +13788,23 @@ define void @s_shuffle_v4f32_v4f32__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13849,23 +13849,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13908,22 +13908,22 @@ define void @s_shuffle_v4f32_v4f32__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -13968,23 +13968,23 @@ define void @s_shuffle_v4f32_v4f32__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14029,23 +14029,23 @@ define void @s_shuffle_v4f32_v4f32__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14090,23 +14090,23 @@ define void @s_shuffle_v4f32_v4f32__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14151,23 +14151,23 @@ define void @s_shuffle_v4f32_v4f32__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14210,22 +14210,22 @@ define void @s_shuffle_v4f32_v4f32__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14270,23 +14270,23 @@ define void @s_shuffle_v4f32_v4f32__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14331,23 +14331,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14390,22 +14390,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14450,23 +14450,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14511,23 +14511,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14572,23 +14572,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14633,23 +14633,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14694,23 +14694,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14753,22 +14753,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14928,22 +14928,22 @@ define void @s_shuffle_v4f32_v4f32__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -14986,22 +14986,22 @@ define void @s_shuffle_v4f32_v4f32__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15044,22 +15044,22 @@ define void @s_shuffle_v4f32_v4f32__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15102,22 +15102,22 @@ define void @s_shuffle_v4f32_v4f32__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15162,23 +15162,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15223,23 +15223,23 @@ define void @s_shuffle_v4f32_v4f32__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15284,23 +15284,23 @@ define void @s_shuffle_v4f32_v4f32__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15345,23 +15345,23 @@ define void @s_shuffle_v4f32_v4f32__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15404,22 +15404,22 @@ define void @s_shuffle_v4f32_v4f32__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15464,23 +15464,23 @@ define void @s_shuffle_v4f32_v4f32__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15525,23 +15525,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15584,22 +15584,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15644,23 +15644,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15705,23 +15705,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15766,23 +15766,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15827,23 +15827,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15888,23 +15888,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -15947,22 +15947,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16122,22 +16122,22 @@ define void @s_shuffle_v4f32_v4f32__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16180,22 +16180,22 @@ define void @s_shuffle_v4f32_v4f32__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16238,22 +16238,22 @@ define void @s_shuffle_v4f32_v4f32__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16294,21 +16294,21 @@ define void @s_shuffle_v4f32_v4f32__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16353,23 +16353,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16410,21 +16410,21 @@ define void @s_shuffle_v4f32_v4f32__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16467,22 +16467,22 @@ define void @s_shuffle_v4f32_v4f32__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16525,22 +16525,22 @@ define void @s_shuffle_v4f32_v4f32__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16583,22 +16583,22 @@ define void @s_shuffle_v4f32_v4f32__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16641,22 +16641,22 @@ define void @s_shuffle_v4f32_v4f32__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16699,22 +16699,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16757,22 +16757,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16817,23 +16817,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16878,23 +16878,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -16939,23 +16939,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17000,23 +17000,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17061,23 +17061,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17120,22 +17120,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17295,22 +17295,22 @@ define void @s_shuffle_v4f32_v4f32__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17353,22 +17353,22 @@ define void @s_shuffle_v4f32_v4f32__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17411,22 +17411,22 @@ define void @s_shuffle_v4f32_v4f32__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17467,21 +17467,21 @@ define void @s_shuffle_v4f32_v4f32__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17526,23 +17526,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17583,21 +17583,21 @@ define void @s_shuffle_v4f32_v4f32__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17640,22 +17640,22 @@ define void @s_shuffle_v4f32_v4f32__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17698,22 +17698,22 @@ define void @s_shuffle_v4f32_v4f32__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17756,22 +17756,22 @@ define void @s_shuffle_v4f32_v4f32__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17814,22 +17814,22 @@ define void @s_shuffle_v4f32_v4f32__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17872,22 +17872,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17928,21 +17928,21 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -17987,23 +17987,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18048,23 +18048,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18105,21 +18105,21 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18162,22 +18162,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18220,22 +18220,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18278,22 +18278,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18338,17 +18338,17 @@ define void @s_shuffle_v4f32_v4f32__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -18380,17 +18380,17 @@ define void @s_shuffle_v4f32_v4f32__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -18422,17 +18422,17 @@ define void @s_shuffle_v4f32_v4f32__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -18464,17 +18464,17 @@ define void @s_shuffle_v4f32_v4f32__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x float> %shuf) @@ -18526,20 +18526,20 @@ define void @s_shuffle_v4f32_v4f32__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18578,20 +18578,20 @@ define void @s_shuffle_v4f32_v4f32__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18630,20 +18630,20 @@ define void @s_shuffle_v4f32_v4f32__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18680,19 +18680,19 @@ define void @s_shuffle_v4f32_v4f32__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18737,23 +18737,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18796,22 +18796,22 @@ define void @s_shuffle_v4f32_v4f32__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18856,23 +18856,23 @@ define void @s_shuffle_v4f32_v4f32__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18917,23 +18917,23 @@ define void @s_shuffle_v4f32_v4f32__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -18972,20 +18972,20 @@ define void @s_shuffle_v4f32_v4f32__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19024,20 +19024,20 @@ define void @s_shuffle_v4f32_v4f32__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19076,20 +19076,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19126,19 +19126,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19183,23 +19183,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19244,23 +19244,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19303,22 +19303,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19363,23 +19363,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19418,20 +19418,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19470,20 +19470,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19546,22 +19546,22 @@ define void @s_shuffle_v4f32_v4f32__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19604,22 +19604,22 @@ define void @s_shuffle_v4f32_v4f32__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19662,22 +19662,22 @@ define void @s_shuffle_v4f32_v4f32__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19720,22 +19720,22 @@ define void @s_shuffle_v4f32_v4f32__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19855,19 +19855,19 @@ define void @s_shuffle_v4f32_v4f32__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19912,23 +19912,23 @@ define void @s_shuffle_v4f32_v4f32__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -19971,22 +19971,22 @@ define void @s_shuffle_v4f32_v4f32__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20031,23 +20031,23 @@ define void @s_shuffle_v4f32_v4f32__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20092,23 +20092,23 @@ define void @s_shuffle_v4f32_v4f32__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20147,20 +20147,20 @@ define void @s_shuffle_v4f32_v4f32__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20199,20 +20199,20 @@ define void @s_shuffle_v4f32_v4f32__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20251,20 +20251,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20301,19 +20301,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20358,23 +20358,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20419,23 +20419,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20478,22 +20478,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20538,23 +20538,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20593,20 +20593,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20645,20 +20645,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20721,22 +20721,22 @@ define void @s_shuffle_v4f32_v4f32__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20779,22 +20779,22 @@ define void @s_shuffle_v4f32_v4f32__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20837,22 +20837,22 @@ define void @s_shuffle_v4f32_v4f32__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -20895,22 +20895,22 @@ define void @s_shuffle_v4f32_v4f32__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21056,22 +21056,22 @@ define void @s_shuffle_v4f32_v4f32__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21114,22 +21114,22 @@ define void @s_shuffle_v4f32_v4f32__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21172,22 +21172,22 @@ define void @s_shuffle_v4f32_v4f32__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21230,22 +21230,22 @@ define void @s_shuffle_v4f32_v4f32__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21284,20 +21284,20 @@ define void @s_shuffle_v4f32_v4f32__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21375,19 +21375,19 @@ define void @s_shuffle_v4f32_v4f32__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21432,23 +21432,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21493,23 +21493,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21552,22 +21552,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21612,23 +21612,23 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21667,20 +21667,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21719,20 +21719,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21795,22 +21795,22 @@ define void @s_shuffle_v4f32_v4f32__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21853,22 +21853,22 @@ define void @s_shuffle_v4f32_v4f32__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21911,22 +21911,22 @@ define void @s_shuffle_v4f32_v4f32__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -21969,22 +21969,22 @@ define void @s_shuffle_v4f32_v4f32__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22109,22 +22109,22 @@ define void @s_shuffle_v4f32_v4f32__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22167,22 +22167,22 @@ define void @s_shuffle_v4f32_v4f32__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22225,22 +22225,22 @@ define void @s_shuffle_v4f32_v4f32__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22283,22 +22283,22 @@ define void @s_shuffle_v4f32_v4f32__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22337,20 +22337,20 @@ define void @s_shuffle_v4f32_v4f32__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22454,22 +22454,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22512,22 +22512,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22570,22 +22570,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22628,22 +22628,22 @@ define void @s_shuffle_v4f32_v4f32__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22682,20 +22682,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> @@ -22734,20 +22734,20 @@ define void @s_shuffle_v4f32_v4f32__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4f32_v4f32__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4f32_v4f32__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x float> asm "; def $0", "=s"() %vec1 = call <4 x float> asm "; def $0", "=s"() %shuf = shufflevector <4 x float> %vec0, <4 x float> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v2i16.ll index d03477085a299..c985c06900ff8 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v2i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i16_v2i16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i16_v2i16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4i16_v2i16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -132,17 +132,17 @@ define void @v_shuffle_v4i16_v2i16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4i16_v2i16__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -236,22 +236,22 @@ define void @v_shuffle_v4i16_v2i16__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v2 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v2 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -284,17 +284,17 @@ define void @v_shuffle_v4i16_v2i16__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -329,18 +329,18 @@ define void @v_shuffle_v4i16_v2i16__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -381,21 +381,21 @@ define void @v_shuffle_v4i16_v2i16__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -438,22 +438,22 @@ define void @v_shuffle_v4i16_v2i16__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -488,19 +488,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -537,19 +537,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -592,23 +592,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -651,23 +651,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -704,19 +704,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -753,19 +753,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -802,19 +802,19 @@ define void @v_shuffle_v4i16_v2i16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -850,19 +850,19 @@ define void @v_shuffle_v4i16_v2i16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> zeroinitializer store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -898,19 +898,19 @@ define void @v_shuffle_v4i16_v2i16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -946,19 +946,19 @@ define void @v_shuffle_v4i16_v2i16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1000,22 +1000,22 @@ define void @v_shuffle_v4i16_v2i16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1058,22 +1058,22 @@ define void @v_shuffle_v4i16_v2i16__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1118,23 +1118,23 @@ define void @v_shuffle_v4i16_v2i16__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1177,22 +1177,22 @@ define void @v_shuffle_v4i16_v2i16__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1237,23 +1237,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1296,22 +1296,22 @@ define void @v_shuffle_v4i16_v2i16__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1354,22 +1354,22 @@ define void @v_shuffle_v4i16_v2i16__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1414,23 +1414,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1465,19 +1465,19 @@ define void @v_shuffle_v4i16_v2i16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1511,19 +1511,19 @@ define void @v_shuffle_v4i16_v2i16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1559,19 +1559,19 @@ define void @v_shuffle_v4i16_v2i16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4i16_v2i16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1659,22 +1659,22 @@ define void @v_shuffle_v4i16_v2i16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1717,22 +1717,22 @@ define void @v_shuffle_v4i16_v2i16__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1775,22 +1775,22 @@ define void @v_shuffle_v4i16_v2i16__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1833,22 +1833,22 @@ define void @v_shuffle_v4i16_v2i16__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1891,22 +1891,22 @@ define void @v_shuffle_v4i16_v2i16__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -1947,21 +1947,21 @@ define void @v_shuffle_v4i16_v2i16__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2002,21 +2002,21 @@ define void @v_shuffle_v4i16_v2i16__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2061,23 +2061,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2119,16 +2119,16 @@ define void @v_shuffle_v4i16_v2i16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2160,17 +2160,17 @@ define void @v_shuffle_v4i16_v2i16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -2217,19 +2217,19 @@ define void @v_shuffle_v4i16_v2i16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2266,19 +2266,19 @@ define void @v_shuffle_v4i16_v2i16__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2321,23 +2321,23 @@ define void @v_shuffle_v4i16_v2i16__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2382,23 +2382,23 @@ define void @v_shuffle_v4i16_v2i16__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2437,20 +2437,20 @@ define void @v_shuffle_v4i16_v2i16__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2487,19 +2487,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2544,23 +2544,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2603,23 +2603,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2654,19 +2654,19 @@ define void @v_shuffle_v4i16_v2i16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2711,23 +2711,23 @@ define void @v_shuffle_v4i16_v2i16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2770,22 +2770,22 @@ define void @v_shuffle_v4i16_v2i16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2820,19 +2820,19 @@ define void @v_shuffle_v4i16_v2i16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2869,19 +2869,19 @@ define void @v_shuffle_v4i16_v2i16__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2924,23 +2924,23 @@ define void @v_shuffle_v4i16_v2i16__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -2983,22 +2983,22 @@ define void @v_shuffle_v4i16_v2i16__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3035,19 +3035,19 @@ define void @v_shuffle_v4i16_v2i16__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3082,19 +3082,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3139,23 +3139,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3198,23 +3198,23 @@ define void @v_shuffle_v4i16_v2i16__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v3 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v3 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3249,19 +3249,19 @@ define void @v_shuffle_v4i16_v2i16__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v2i16__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v2i16__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=v"() %vec1 = call <2 x i16> asm "; def $0", "=v"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3306,17 +3306,17 @@ define void @s_shuffle_v4i16_v2i16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -3348,17 +3348,17 @@ define void @s_shuffle_v4i16_v2i16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -3404,17 +3404,17 @@ define void @s_shuffle_v4i16_v2i16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3455,21 +3455,21 @@ define void @s_shuffle_v4i16_v2i16__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3508,20 +3508,20 @@ define void @s_shuffle_v4i16_v2i16__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3556,18 +3556,18 @@ define void @s_shuffle_v4i16_v2i16__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3600,17 +3600,17 @@ define void @s_shuffle_v4i16_v2i16__3_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3649,20 +3649,20 @@ define void @s_shuffle_v4i16_v2i16__3_3_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3703,21 +3703,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3771,18 +3771,18 @@ define void @s_shuffle_v4i16_v2i16__3_3_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3825,22 +3825,22 @@ define void @s_shuffle_v4i16_v2i16__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3881,21 +3881,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3932,19 +3932,19 @@ define void @s_shuffle_v4i16_v2i16__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -3979,18 +3979,18 @@ define void @s_shuffle_v4i16_v2i16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4025,18 +4025,18 @@ define void @s_shuffle_v4i16_v2i16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -4070,18 +4070,18 @@ define void @s_shuffle_v4i16_v2i16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -4117,19 +4117,19 @@ define void @s_shuffle_v4i16_v2i16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -4163,18 +4163,18 @@ define void @s_shuffle_v4i16_v2i16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -4216,22 +4216,22 @@ define void @s_shuffle_v4i16_v2i16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4272,21 +4272,21 @@ define void @s_shuffle_v4i16_v2i16__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4327,21 +4327,21 @@ define void @s_shuffle_v4i16_v2i16__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4384,22 +4384,22 @@ define void @s_shuffle_v4i16_v2i16__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4440,21 +4440,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4495,21 +4495,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4552,22 +4552,22 @@ define void @s_shuffle_v4i16_v2i16__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4608,21 +4608,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4693,18 +4693,18 @@ define void @s_shuffle_v4i16_v2i16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -4762,21 +4762,21 @@ define void @s_shuffle_v4i16_v2i16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4817,21 +4817,21 @@ define void @s_shuffle_v4i16_v2i16__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4874,22 +4874,22 @@ define void @s_shuffle_v4i16_v2i16__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4932,22 +4932,22 @@ define void @s_shuffle_v4i16_v2i16__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -4988,21 +4988,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5041,20 +5041,20 @@ define void @s_shuffle_v4i16_v2i16__3_3_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5093,20 +5093,20 @@ define void @s_shuffle_v4i16_v2i16__3_3_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s9 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s9 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5147,21 +5147,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5206,17 +5206,17 @@ define void @s_shuffle_v4i16_v2i16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s8 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s8 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -5248,17 +5248,17 @@ define void @s_shuffle_v4i16_v2i16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -5308,19 +5308,19 @@ define void @s_shuffle_v4i16_v2i16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5355,18 +5355,18 @@ define void @s_shuffle_v4i16_v2i16__3_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5409,22 +5409,22 @@ define void @s_shuffle_v4i16_v2i16__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5465,21 +5465,21 @@ define void @s_shuffle_v4i16_v2i16__3_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5514,18 +5514,18 @@ define void @s_shuffle_v4i16_v2i16__3_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5560,18 +5560,18 @@ define void @s_shuffle_v4i16_v2i16__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5612,21 +5612,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5669,22 +5669,22 @@ define void @s_shuffle_v4i16_v2i16__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5744,21 +5744,21 @@ define void @s_shuffle_v4i16_v2i16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5799,21 +5799,21 @@ define void @s_shuffle_v4i16_v2i16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5867,18 +5867,18 @@ define void @s_shuffle_v4i16_v2i16__3_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5921,22 +5921,22 @@ define void @s_shuffle_v4i16_v2i16__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -5977,21 +5977,21 @@ define void @s_shuffle_v4i16_v2i16__3_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -6028,19 +6028,19 @@ define void @s_shuffle_v4i16_v2i16__3_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -6100,21 +6100,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> @@ -6155,21 +6155,21 @@ define void @s_shuffle_v4i16_v2i16__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v2i16__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s0 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s1 -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v2i16__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s0 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s1 +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i16> asm "; def $0", "=s"() %vec1 = call <2 x i16> asm "; def $0", "=s"() %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll index 45afe29b1cda4..7b3a5a879f44f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i16_v3i16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -39,16 +39,16 @@ define void @v_shuffle_v4i16_v3i16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -81,17 +81,17 @@ define void @v_shuffle_v4i16_v3i16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -124,17 +124,17 @@ define void @v_shuffle_v4i16_v3i16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -179,17 +179,17 @@ define void @v_shuffle_v4i16_v3i16__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -224,17 +224,17 @@ define void @v_shuffle_v4i16_v3i16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -277,22 +277,22 @@ define void @v_shuffle_v4i16_v3i16__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -335,22 +335,22 @@ define void @v_shuffle_v4i16_v3i16__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -393,22 +393,22 @@ define void @v_shuffle_v4i16_v3i16__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -445,18 +445,18 @@ define void @v_shuffle_v4i16_v3i16__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -493,18 +493,18 @@ define void @v_shuffle_v4i16_v3i16__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -541,18 +541,18 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -597,22 +597,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -657,22 +657,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -715,22 +715,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -769,20 +769,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -821,20 +821,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -871,18 +871,18 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -927,23 +927,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -990,23 +990,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1051,23 +1051,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1106,20 +1106,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1160,20 +1160,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1212,19 +1212,19 @@ define void @v_shuffle_v4i16_v3i16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1263,19 +1263,19 @@ define void @v_shuffle_v4i16_v3i16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -1312,19 +1312,19 @@ define void @v_shuffle_v4i16_v3i16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer @@ -1361,19 +1361,19 @@ define void @v_shuffle_v4i16_v3i16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -1410,20 +1410,20 @@ define void @v_shuffle_v4i16_v3i16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -1460,19 +1460,19 @@ define void @v_shuffle_v4i16_v3i16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -1515,22 +1515,22 @@ define void @v_shuffle_v4i16_v3i16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1575,23 +1575,23 @@ define void @v_shuffle_v4i16_v3i16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1636,22 +1636,22 @@ define void @v_shuffle_v4i16_v3i16__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1698,23 +1698,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1759,23 +1759,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1820,22 +1820,22 @@ define void @v_shuffle_v4i16_v3i16__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1882,24 +1882,24 @@ define void @v_shuffle_v4i16_v3i16__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -1944,22 +1944,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2004,22 +2004,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2064,22 +2064,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2124,22 +2124,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2184,23 +2184,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2245,23 +2245,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2298,18 +2298,18 @@ define void @v_shuffle_v4i16_v3i16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -2344,18 +2344,18 @@ define void @v_shuffle_v4i16_v3i16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -2392,19 +2392,19 @@ define void @v_shuffle_v4i16_v3i16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -2443,20 +2443,20 @@ define void @v_shuffle_v4i16_v3i16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -2491,18 +2491,18 @@ define void @v_shuffle_v4i16_v3i16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -2545,23 +2545,23 @@ define void @v_shuffle_v4i16_v3i16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2608,23 +2608,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2669,22 +2669,22 @@ define void @v_shuffle_v4i16_v3i16__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2731,23 +2731,23 @@ define void @v_shuffle_v4i16_v3i16__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2794,23 +2794,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2857,24 +2857,24 @@ define void @v_shuffle_v4i16_v3i16__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2921,24 +2921,24 @@ define void @v_shuffle_v4i16_v3i16__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -2985,24 +2985,24 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3047,22 +3047,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3107,22 +3107,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3169,24 +3169,24 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3233,23 +3233,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3296,23 +3296,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3351,20 +3351,20 @@ define void @v_shuffle_v4i16_v3i16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -3401,19 +3401,19 @@ define void @v_shuffle_v4i16_v3i16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -3450,20 +3450,20 @@ define void @v_shuffle_v4i16_v3i16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -3500,19 +3500,19 @@ define void @v_shuffle_v4i16_v3i16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -3549,20 +3549,20 @@ define void @v_shuffle_v4i16_v3i16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -3605,23 +3605,23 @@ define void @v_shuffle_v4i16_v3i16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3666,23 +3666,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3727,22 +3727,22 @@ define void @v_shuffle_v4i16_v3i16__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3787,22 +3787,22 @@ define void @v_shuffle_v4i16_v3i16__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3849,23 +3849,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3910,22 +3910,22 @@ define void @v_shuffle_v4i16_v3i16__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -3972,23 +3972,23 @@ define void @v_shuffle_v4i16_v3i16__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4033,22 +4033,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4093,22 +4093,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4153,22 +4153,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4213,22 +4213,22 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4273,23 +4273,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4334,23 +4334,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4395,16 +4395,16 @@ define void @v_shuffle_v4i16_v3i16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -4437,17 +4437,17 @@ define void @v_shuffle_v4i16_v3i16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -4480,17 +4480,17 @@ define void @v_shuffle_v4i16_v3i16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -4539,19 +4539,19 @@ define void @v_shuffle_v4i16_v3i16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4590,20 +4590,20 @@ define void @v_shuffle_v4i16_v3i16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4641,20 +4641,20 @@ define void @v_shuffle_v4i16_v3i16__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4699,23 +4699,23 @@ define void @v_shuffle_v4i16_v3i16__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4762,23 +4762,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4823,23 +4823,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4880,20 +4880,20 @@ define void @v_shuffle_v4i16_v3i16__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4932,20 +4932,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -4984,20 +4984,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5042,23 +5042,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5103,23 +5103,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5164,23 +5164,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5219,20 +5219,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5269,18 +5269,18 @@ define void @v_shuffle_v4i16_v3i16__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5327,23 +5327,23 @@ define void @v_shuffle_v4i16_v3i16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5388,23 +5388,23 @@ define void @v_shuffle_v4i16_v3i16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5451,23 +5451,23 @@ define void @v_shuffle_v4i16_v3i16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5504,18 +5504,18 @@ define void @v_shuffle_v4i16_v3i16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5554,19 +5554,19 @@ define void @v_shuffle_v4i16_v3i16__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5607,20 +5607,20 @@ define void @v_shuffle_v4i16_v3i16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5658,20 +5658,20 @@ define void @v_shuffle_v4i16_v3i16__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5718,23 +5718,23 @@ define void @v_shuffle_v4i16_v3i16__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5781,23 +5781,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5844,23 +5844,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5901,20 +5901,20 @@ define void @v_shuffle_v4i16_v3i16__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -5955,20 +5955,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6007,20 +6007,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6067,23 +6067,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6130,23 +6130,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6193,23 +6193,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6248,20 +6248,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6300,20 +6300,20 @@ define void @v_shuffle_v4i16_v3i16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6358,23 +6358,23 @@ define void @v_shuffle_v4i16_v3i16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6419,23 +6419,23 @@ define void @v_shuffle_v4i16_v3i16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6480,23 +6480,23 @@ define void @v_shuffle_v4i16_v3i16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6535,19 +6535,19 @@ define void @v_shuffle_v4i16_v3i16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6586,20 +6586,20 @@ define void @v_shuffle_v4i16_v3i16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6637,20 +6637,20 @@ define void @v_shuffle_v4i16_v3i16__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6695,23 +6695,23 @@ define void @v_shuffle_v4i16_v3i16__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6758,23 +6758,23 @@ define void @v_shuffle_v4i16_v3i16__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6819,23 +6819,23 @@ define void @v_shuffle_v4i16_v3i16__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6874,19 +6874,19 @@ define void @v_shuffle_v4i16_v3i16__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6927,20 +6927,20 @@ define void @v_shuffle_v4i16_v3i16__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -6979,19 +6979,19 @@ define void @v_shuffle_v4i16_v3i16__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7036,23 +7036,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7097,23 +7097,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7158,23 +7158,23 @@ define void @v_shuffle_v4i16_v3i16__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7213,20 +7213,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7265,20 +7265,20 @@ define void @v_shuffle_v4i16_v3i16__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7326,17 +7326,17 @@ define void @s_shuffle_v4i16_v3i16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -7369,17 +7369,17 @@ define void @s_shuffle_v4i16_v3i16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -7412,17 +7412,17 @@ define void @s_shuffle_v4i16_v3i16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -7470,17 +7470,17 @@ define void @s_shuffle_v4i16_v3i16__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7515,17 +7515,17 @@ define void @s_shuffle_v4i16_v3i16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7566,20 +7566,20 @@ define void @s_shuffle_v4i16_v3i16__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7620,20 +7620,20 @@ define void @s_shuffle_v4i16_v3i16__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7674,20 +7674,20 @@ define void @s_shuffle_v4i16_v3i16__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7722,17 +7722,17 @@ define void @s_shuffle_v4i16_v3i16__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7767,17 +7767,17 @@ define void @s_shuffle_v4i16_v3i16__5_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7812,17 +7812,17 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7865,21 +7865,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7922,21 +7922,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -7977,20 +7977,20 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8027,18 +8027,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8075,18 +8075,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8150,21 +8150,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8207,21 +8207,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8264,21 +8264,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8315,18 +8315,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8363,18 +8363,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8411,18 +8411,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8459,18 +8459,18 @@ define void @s_shuffle_v4i16_v3i16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -8505,18 +8505,18 @@ define void @s_shuffle_v4i16_v3i16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer @@ -8553,19 +8553,19 @@ define void @s_shuffle_v4i16_v3i16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -8600,18 +8600,18 @@ define void @s_shuffle_v4i16_v3i16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -8646,18 +8646,18 @@ define void @s_shuffle_v4i16_v3i16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -8700,22 +8700,22 @@ define void @s_shuffle_v4i16_v3i16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8758,21 +8758,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8815,21 +8815,21 @@ define void @s_shuffle_v4i16_v3i16__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8872,21 +8872,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8929,21 +8929,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -8986,21 +8986,21 @@ define void @s_shuffle_v4i16_v3i16__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9043,21 +9043,21 @@ define void @s_shuffle_v4i16_v3i16__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9100,21 +9100,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9157,21 +9157,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9216,22 +9216,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9274,21 +9274,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9331,21 +9331,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9390,22 +9390,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9480,18 +9480,18 @@ define void @s_shuffle_v4i16_v3i16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -9526,18 +9526,18 @@ define void @s_shuffle_v4i16_v3i16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -9597,21 +9597,21 @@ define void @s_shuffle_v4i16_v3i16__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9654,21 +9654,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9711,21 +9711,21 @@ define void @s_shuffle_v4i16_v3i16__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9768,21 +9768,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9825,21 +9825,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9882,21 +9882,21 @@ define void @s_shuffle_v4i16_v3i16__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9939,21 +9939,21 @@ define void @s_shuffle_v4i16_v3i16__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -9996,21 +9996,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10053,21 +10053,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10110,21 +10110,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10167,21 +10167,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10224,21 +10224,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10281,21 +10281,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10332,18 +10332,18 @@ define void @s_shuffle_v4i16_v3i16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -10378,18 +10378,18 @@ define void @s_shuffle_v4i16_v3i16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -10426,19 +10426,19 @@ define void @s_shuffle_v4i16_v3i16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -10473,18 +10473,18 @@ define void @s_shuffle_v4i16_v3i16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -10519,18 +10519,18 @@ define void @s_shuffle_v4i16_v3i16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -10573,22 +10573,22 @@ define void @s_shuffle_v4i16_v3i16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10631,21 +10631,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10688,21 +10688,21 @@ define void @s_shuffle_v4i16_v3i16__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10745,21 +10745,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10802,21 +10802,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10859,21 +10859,21 @@ define void @s_shuffle_v4i16_v3i16__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10916,21 +10916,21 @@ define void @s_shuffle_v4i16_v3i16__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -10973,21 +10973,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11030,21 +11030,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11087,21 +11087,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11146,22 +11146,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11204,21 +11204,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11263,22 +11263,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11326,17 +11326,17 @@ define void @s_shuffle_v4i16_v3i16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -11369,17 +11369,17 @@ define void @s_shuffle_v4i16_v3i16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -11412,17 +11412,17 @@ define void @s_shuffle_v4i16_v3i16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> @@ -11474,19 +11474,19 @@ define void @s_shuffle_v4i16_v3i16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11523,18 +11523,18 @@ define void @s_shuffle_v4i16_v3i16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11571,18 +11571,18 @@ define void @s_shuffle_v4i16_v3i16__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11625,21 +11625,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11682,21 +11682,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11739,21 +11739,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11790,18 +11790,18 @@ define void @s_shuffle_v4i16_v3i16__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11838,18 +11838,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11886,18 +11886,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11940,21 +11940,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -11999,22 +11999,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12057,21 +12057,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12110,19 +12110,19 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12186,21 +12186,21 @@ define void @s_shuffle_v4i16_v3i16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12243,21 +12243,21 @@ define void @s_shuffle_v4i16_v3i16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12300,21 +12300,21 @@ define void @s_shuffle_v4i16_v3i16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12372,18 +12372,18 @@ define void @s_shuffle_v4i16_v3i16__4_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12420,18 +12420,18 @@ define void @s_shuffle_v4i16_v3i16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12468,18 +12468,18 @@ define void @s_shuffle_v4i16_v3i16__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12522,21 +12522,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12579,21 +12579,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12636,21 +12636,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12687,18 +12687,18 @@ define void @s_shuffle_v4i16_v3i16__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12735,18 +12735,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12783,18 +12783,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12837,21 +12837,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12894,21 +12894,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -12951,21 +12951,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13002,18 +13002,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13050,18 +13050,18 @@ define void @s_shuffle_v4i16_v3i16__u_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13104,21 +13104,21 @@ define void @s_shuffle_v4i16_v3i16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13163,22 +13163,22 @@ define void @s_shuffle_v4i16_v3i16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13221,21 +13221,21 @@ define void @s_shuffle_v4i16_v3i16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13272,18 +13272,18 @@ define void @s_shuffle_v4i16_v3i16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13322,19 +13322,19 @@ define void @s_shuffle_v4i16_v3i16__4_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13371,18 +13371,18 @@ define void @s_shuffle_v4i16_v3i16__5_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13425,21 +13425,21 @@ define void @s_shuffle_v4i16_v3i16__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13482,21 +13482,21 @@ define void @s_shuffle_v4i16_v3i16__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13539,21 +13539,21 @@ define void @s_shuffle_v4i16_v3i16__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13590,18 +13590,18 @@ define void @s_shuffle_v4i16_v3i16__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13638,18 +13638,18 @@ define void @s_shuffle_v4i16_v3i16__5_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13686,18 +13686,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13740,21 +13740,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13799,22 +13799,22 @@ define void @s_shuffle_v4i16_v3i16__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13857,21 +13857,21 @@ define void @s_shuffle_v4i16_v3i16__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13908,18 +13908,18 @@ define void @s_shuffle_v4i16_v3i16__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> @@ -13958,19 +13958,19 @@ define void @s_shuffle_v4i16_v3i16__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll index 46d301c77ab5c..2a371b7c7d2d3 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i16_v4i16__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i16_v4i16__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -79,17 +79,17 @@ define void @v_shuffle_v4i16_v4i16__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -121,17 +121,17 @@ define void @v_shuffle_v4i16_v4i16__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -163,17 +163,17 @@ define void @v_shuffle_v4i16_v4i16__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -216,17 +216,17 @@ define void @v_shuffle_v4i16_v4i16__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4i16_v4i16__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4i16_v4i16__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -351,21 +351,21 @@ define void @v_shuffle_v4i16_v4i16__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -406,22 +406,22 @@ define void @v_shuffle_v4i16_v4i16__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -460,21 +460,21 @@ define void @v_shuffle_v4i16_v4i16__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -515,22 +515,22 @@ define void @v_shuffle_v4i16_v4i16__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -563,17 +563,17 @@ define void @v_shuffle_v4i16_v4i16__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -608,18 +608,18 @@ define void @v_shuffle_v4i16_v4i16__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -652,17 +652,17 @@ define void @v_shuffle_v4i16_v4i16__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -697,18 +697,18 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -751,22 +751,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -809,22 +809,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -865,22 +865,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -923,22 +923,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -975,20 +975,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1025,20 +1025,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1073,18 +1073,18 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1121,20 +1121,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1177,23 +1177,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1236,23 +1236,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1295,23 +1295,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1354,23 +1354,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1407,20 +1407,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1457,20 +1457,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1507,19 +1507,19 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1556,19 +1556,19 @@ define void @v_shuffle_v4i16_v4i16__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1605,19 +1605,19 @@ define void @v_shuffle_v4i16_v4i16__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1653,19 +1653,19 @@ define void @v_shuffle_v4i16_v4i16__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1701,19 +1701,19 @@ define void @v_shuffle_v4i16_v4i16__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1749,20 +1749,20 @@ define void @v_shuffle_v4i16_v4i16__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1798,20 +1798,20 @@ define void @v_shuffle_v4i16_v4i16__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1847,19 +1847,19 @@ define void @v_shuffle_v4i16_v4i16__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -1901,22 +1901,22 @@ define void @v_shuffle_v4i16_v4i16__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -1959,23 +1959,23 @@ define void @v_shuffle_v4i16_v4i16__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2018,22 +2018,22 @@ define void @v_shuffle_v4i16_v4i16__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2076,22 +2076,22 @@ define void @v_shuffle_v4i16_v4i16__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2136,23 +2136,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2195,23 +2195,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2256,23 +2256,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2315,22 +2315,22 @@ define void @v_shuffle_v4i16_v4i16__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2375,24 +2375,24 @@ define void @v_shuffle_v4i16_v4i16__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2435,22 +2435,22 @@ define void @v_shuffle_v4i16_v4i16__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2495,24 +2495,24 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2555,22 +2555,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2613,22 +2613,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2673,24 +2673,24 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2793,23 +2793,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2852,23 +2852,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v0, v2, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4i16_v4i16__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4i16_v4i16__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4i16_v4i16__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3106,20 +3106,20 @@ define void @v_shuffle_v4i16_v4i16__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3155,20 +3155,20 @@ define void @v_shuffle_v4i16_v4i16__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3202,18 +3202,18 @@ define void @v_shuffle_v4i16_v4i16__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -3255,23 +3255,23 @@ define void @v_shuffle_v4i16_v4i16__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3316,23 +3316,23 @@ define void @v_shuffle_v4i16_v4i16__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3375,23 +3375,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3434,22 +3434,22 @@ define void @v_shuffle_v4i16_v4i16__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3492,22 +3492,22 @@ define void @v_shuffle_v4i16_v4i16__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3550,23 +3550,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3609,23 +3609,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3668,22 +3668,22 @@ define void @v_shuffle_v4i16_v4i16__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3726,22 +3726,22 @@ define void @v_shuffle_v4i16_v4i16__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v2, v3, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3784,22 +3784,22 @@ define void @v_shuffle_v4i16_v4i16__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3842,22 +3842,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3900,22 +3900,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -3958,22 +3958,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4018,24 +4018,24 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4078,22 +4078,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4138,23 +4138,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4197,23 +4197,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4258,23 +4258,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4311,20 +4311,20 @@ define void @v_shuffle_v4i16_v4i16__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4360,19 +4360,19 @@ define void @v_shuffle_v4i16_v4i16__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4408,20 +4408,20 @@ define void @v_shuffle_v4i16_v4i16__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4457,19 +4457,19 @@ define void @v_shuffle_v4i16_v4i16__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4505,20 +4505,20 @@ define void @v_shuffle_v4i16_v4i16__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4554,20 +4554,20 @@ define void @v_shuffle_v4i16_v4i16__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -4609,23 +4609,23 @@ define void @v_shuffle_v4i16_v4i16__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4668,23 +4668,23 @@ define void @v_shuffle_v4i16_v4i16__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4727,23 +4727,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4786,22 +4786,22 @@ define void @v_shuffle_v4i16_v4i16__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4844,22 +4844,22 @@ define void @v_shuffle_v4i16_v4i16__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4904,23 +4904,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -4965,23 +4965,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5024,22 +5024,22 @@ define void @v_shuffle_v4i16_v4i16__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5084,23 +5084,23 @@ define void @v_shuffle_v4i16_v4i16__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5143,22 +5143,22 @@ define void @v_shuffle_v4i16_v4i16__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5203,23 +5203,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5262,22 +5262,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5322,24 +5322,24 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5382,22 +5382,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5440,22 +5440,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_alignbit_b32 v1, v1, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5500,23 +5500,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5559,23 +5559,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v1, v2, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5620,23 +5620,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -5672,20 +5672,20 @@ define void @v_shuffle_v4i16_v4i16__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5723,20 +5723,20 @@ define void @v_shuffle_v4i16_v4i16__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5772,19 +5772,19 @@ define void @v_shuffle_v4i16_v4i16__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5819,20 +5819,20 @@ define void @v_shuffle_v4i16_v4i16__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5868,19 +5868,19 @@ define void @v_shuffle_v4i16_v4i16__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5915,20 +5915,20 @@ define void @v_shuffle_v4i16_v4i16__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -5970,23 +5970,23 @@ define void @v_shuffle_v4i16_v4i16__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6031,23 +6031,23 @@ define void @v_shuffle_v4i16_v4i16__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6090,23 +6090,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6149,22 +6149,22 @@ define void @v_shuffle_v4i16_v4i16__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6207,22 +6207,22 @@ define void @v_shuffle_v4i16_v4i16__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6265,22 +6265,22 @@ define void @v_shuffle_v4i16_v4i16__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6323,23 +6323,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6382,22 +6382,22 @@ define void @v_shuffle_v4i16_v4i16__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v2, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6440,22 +6440,22 @@ define void @v_shuffle_v4i16_v4i16__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v2, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6498,22 +6498,22 @@ define void @v_shuffle_v4i16_v4i16__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6556,22 +6556,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6612,22 +6612,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6672,24 +6672,24 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6732,22 +6732,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v1, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6788,22 +6788,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6848,23 +6848,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v2, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6907,23 +6907,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v2, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -6968,23 +6968,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v3, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7026,16 +7026,16 @@ define void @v_shuffle_v4i16_v4i16__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7067,17 +7067,17 @@ define void @v_shuffle_v4i16_v4i16__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7109,17 +7109,17 @@ define void @v_shuffle_v4i16_v4i16__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7151,17 +7151,17 @@ define void @v_shuffle_v4i16_v4i16__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 @@ -7208,19 +7208,19 @@ define void @v_shuffle_v4i16_v4i16__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7257,20 +7257,20 @@ define void @v_shuffle_v4i16_v4i16__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7307,20 +7307,20 @@ define void @v_shuffle_v4i16_v4i16__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7357,20 +7357,20 @@ define void @v_shuffle_v4i16_v4i16__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7413,23 +7413,23 @@ define void @v_shuffle_v4i16_v4i16__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7474,23 +7474,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7533,23 +7533,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7594,23 +7594,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7649,20 +7649,20 @@ define void @v_shuffle_v4i16_v4i16__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7699,20 +7699,20 @@ define void @v_shuffle_v4i16_v4i16__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7751,20 +7751,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7801,20 +7801,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7859,23 +7859,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7918,23 +7918,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v0, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -7979,23 +7979,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8038,23 +8038,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v1, v2, v1, 16 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v1, v2, v1, 16 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8091,20 +8091,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v3, v0, v0, 16 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8143,20 +8143,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8191,18 +8191,18 @@ define void @v_shuffle_v4i16_v4i16__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8247,23 +8247,23 @@ define void @v_shuffle_v4i16_v4i16__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8306,23 +8306,23 @@ define void @v_shuffle_v4i16_v4i16__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8367,23 +8367,23 @@ define void @v_shuffle_v4i16_v4i16__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8426,23 +8426,23 @@ define void @v_shuffle_v4i16_v4i16__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8477,18 +8477,18 @@ define void @v_shuffle_v4i16_v4i16__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4i16_v4i16__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8576,20 +8576,20 @@ define void @v_shuffle_v4i16_v4i16__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v2, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8626,20 +8626,20 @@ define void @v_shuffle_v4i16_v4i16__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8676,20 +8676,20 @@ define void @v_shuffle_v4i16_v4i16__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8732,23 +8732,23 @@ define void @v_shuffle_v4i16_v4i16__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8791,23 +8791,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8850,23 +8850,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v2, v2, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8909,23 +8909,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -8962,20 +8962,20 @@ define void @v_shuffle_v4i16_v4i16__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9012,20 +9012,20 @@ define void @v_shuffle_v4i16_v4i16__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9062,20 +9062,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v0, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9112,20 +9112,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9170,23 +9170,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9229,23 +9229,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9290,23 +9290,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9349,23 +9349,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v2, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9402,20 +9402,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9454,20 +9454,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v1, v0 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9504,20 +9504,20 @@ define void @v_shuffle_v4i16_v4i16__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9560,23 +9560,23 @@ define void @v_shuffle_v4i16_v4i16__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9619,23 +9619,23 @@ define void @v_shuffle_v4i16_v4i16__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @v_shuffle_v4i16_v4i16__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9737,23 +9737,23 @@ define void @v_shuffle_v4i16_v4i16__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9790,19 +9790,19 @@ define void @v_shuffle_v4i16_v4i16__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9839,20 +9839,20 @@ define void @v_shuffle_v4i16_v4i16__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9889,19 +9889,19 @@ define void @v_shuffle_v4i16_v4i16__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9938,20 +9938,20 @@ define void @v_shuffle_v4i16_v4i16__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -9988,19 +9988,19 @@ define void @v_shuffle_v4i16_v4i16__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, s0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10043,23 +10043,23 @@ define void @v_shuffle_v4i16_v4i16__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10104,23 +10104,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10163,23 +10163,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10224,23 +10224,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10277,19 +10277,19 @@ define void @v_shuffle_v4i16_v4i16__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v1, 16 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10328,20 +10328,20 @@ define void @v_shuffle_v4i16_v4i16__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10380,20 +10380,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10430,19 +10430,19 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10487,23 +10487,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10546,22 +10546,22 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v3, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_perm_b32 v2, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v3, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10606,23 +10606,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10665,23 +10665,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v1, v3, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10720,20 +10720,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x5040100 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x5040100 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10770,20 +10770,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v3, v1, v0, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10819,20 +10819,20 @@ define void @v_shuffle_v4i16_v4i16__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10877,23 +10877,23 @@ define void @v_shuffle_v4i16_v4i16__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10936,23 +10936,23 @@ define void @v_shuffle_v4i16_v4i16__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -10997,23 +10997,23 @@ define void @v_shuffle_v4i16_v4i16__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v0, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11056,23 +11056,23 @@ define void @v_shuffle_v4i16_v4i16__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11111,20 +11111,20 @@ define void @v_shuffle_v4i16_v4i16__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: v_bfi_b32 v0, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11161,19 +11161,19 @@ define void @v_shuffle_v4i16_v4i16__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11209,20 +11209,20 @@ define void @v_shuffle_v4i16_v4i16__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11259,20 +11259,20 @@ define void @v_shuffle_v4i16_v4i16__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, s0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11315,23 +11315,23 @@ define void @v_shuffle_v4i16_v4i16__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v0, v0, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11374,23 +11374,23 @@ define void @v_shuffle_v4i16_v4i16__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11433,23 +11433,23 @@ define void @v_shuffle_v4i16_v4i16__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 -; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 -; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v5, v3, v3, s2 +; GFX942-NEXT: v_alignbit_b32 v4, v1, v3, 16 +; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11492,23 +11492,23 @@ define void @v_shuffle_v4i16_v4i16__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 -; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v3, s2 +; GFX942-NEXT: v_perm_b32 v1, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11545,20 +11545,20 @@ define void @v_shuffle_v4i16_v4i16__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v0, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11595,19 +11595,19 @@ define void @v_shuffle_v4i16_v4i16__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v0, v1, s2 +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11644,20 +11644,20 @@ define void @v_shuffle_v4i16_v4i16__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 -; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v1, s2 +; GFX942-NEXT: v_alignbit_b32 v2, v1, v1, 16 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11692,18 +11692,18 @@ define void @v_shuffle_v4i16_v4i16__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11748,23 +11748,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v0, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11807,23 +11807,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v0, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11868,23 +11868,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v3 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v1, s2, v1, v3 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11927,23 +11927,23 @@ define void @v_shuffle_v4i16_v4i16__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 -; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s2 +; GFX942-NEXT: v_perm_b32 v0, v3, v3, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -11982,20 +11982,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0xffff -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_bfi_b32 v3, s2, v0, v1 -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0xffff +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_bfi_b32 v3, s2, v0, v1 +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12032,20 +12032,20 @@ define void @v_shuffle_v4i16_v4i16__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 -; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_perm_b32 v3, v1, v0, s2 +; GFX942-NEXT: v_perm_b32 v2, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12080,18 +12080,18 @@ define void @v_shuffle_v4i16_v4i16__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s2, 0x7060302 -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 -; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s2, 0x7060302 +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_perm_b32 v0, v1, v1, s2 +; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=v"() %vec1 = call <4 x i16> asm "; def $0", "=v"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12136,17 +12136,17 @@ define void @s_shuffle_v4i16_v4i16__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -12178,17 +12178,17 @@ define void @s_shuffle_v4i16_v4i16__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -12220,17 +12220,17 @@ define void @s_shuffle_v4i16_v4i16__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -12262,17 +12262,17 @@ define void @s_shuffle_v4i16_v4i16__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -12318,17 +12318,17 @@ define void @s_shuffle_v4i16_v4i16__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12361,17 +12361,17 @@ define void @s_shuffle_v4i16_v4i16__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12404,17 +12404,17 @@ define void @s_shuffle_v4i16_v4i16__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12455,21 +12455,21 @@ define void @s_shuffle_v4i16_v4i16__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12508,20 +12508,20 @@ define void @s_shuffle_v4i16_v4i16__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12562,21 +12562,21 @@ define void @s_shuffle_v4i16_v4i16__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12615,20 +12615,20 @@ define void @s_shuffle_v4i16_v4i16__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12663,18 +12663,18 @@ define void @s_shuffle_v4i16_v4i16__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12707,17 +12707,17 @@ define void @s_shuffle_v4i16_v4i16__7_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12752,18 +12752,18 @@ define void @s_shuffle_v4i16_v4i16__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12796,17 +12796,17 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12847,21 +12847,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12902,21 +12902,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -12955,20 +12955,20 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13009,21 +13009,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13058,18 +13058,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13104,18 +13104,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13169,18 +13169,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s9, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s9, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13223,22 +13223,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13279,21 +13279,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13336,22 +13336,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13392,21 +13392,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13443,19 +13443,19 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13490,18 +13490,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13538,19 +13538,19 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13585,18 +13585,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13631,18 +13631,18 @@ define void @s_shuffle_v4i16_v4i16__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13676,18 +13676,18 @@ define void @s_shuffle_v4i16_v4i16__0_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13723,19 +13723,19 @@ define void @s_shuffle_v4i16_v4i16__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13769,18 +13769,18 @@ define void @s_shuffle_v4i16_v4i16__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13816,19 +13816,19 @@ define void @s_shuffle_v4i16_v4i16__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13862,18 +13862,18 @@ define void @s_shuffle_v4i16_v4i16__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_lshl_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_lshl_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -13915,22 +13915,22 @@ define void @s_shuffle_v4i16_v4i16__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -13971,21 +13971,21 @@ define void @s_shuffle_v4i16_v4i16__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14028,22 +14028,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14084,21 +14084,21 @@ define void @s_shuffle_v4i16_v4i16__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14139,21 +14139,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14196,22 +14196,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14252,21 +14252,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14309,22 +14309,22 @@ define void @s_shuffle_v4i16_v4i16__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14365,21 +14365,21 @@ define void @s_shuffle_v4i16_v4i16__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14422,22 +14422,22 @@ define void @s_shuffle_v4i16_v4i16__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14478,21 +14478,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14533,21 +14533,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14590,22 +14590,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14646,21 +14646,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14703,22 +14703,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14759,21 +14759,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14816,22 +14816,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14872,21 +14872,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -14957,18 +14957,18 @@ define void @s_shuffle_v4i16_v4i16__1_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -15002,18 +15002,18 @@ define void @s_shuffle_v4i16_v4i16__2_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -15047,18 +15047,18 @@ define void @s_shuffle_v4i16_v4i16__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -15116,21 +15116,21 @@ define void @s_shuffle_v4i16_v4i16__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15171,21 +15171,21 @@ define void @s_shuffle_v4i16_v4i16__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15226,21 +15226,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15281,21 +15281,21 @@ define void @s_shuffle_v4i16_v4i16__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15338,22 +15338,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15396,22 +15396,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15452,21 +15452,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15509,22 +15509,22 @@ define void @s_shuffle_v4i16_v4i16__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15565,21 +15565,21 @@ define void @s_shuffle_v4i16_v4i16__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15622,22 +15622,22 @@ define void @s_shuffle_v4i16_v4i16__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15678,21 +15678,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15733,21 +15733,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15788,21 +15788,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15843,21 +15843,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15898,21 +15898,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -15953,21 +15953,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16008,21 +16008,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16063,21 +16063,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16112,18 +16112,18 @@ define void @s_shuffle_v4i16_v4i16__u_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16157,18 +16157,18 @@ define void @s_shuffle_v4i16_v4i16__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16204,19 +16204,19 @@ define void @s_shuffle_v4i16_v4i16__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16250,18 +16250,18 @@ define void @s_shuffle_v4i16_v4i16__2_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16297,19 +16297,19 @@ define void @s_shuffle_v4i16_v4i16__3_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16343,18 +16343,18 @@ define void @s_shuffle_v4i16_v4i16__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -16396,22 +16396,22 @@ define void @s_shuffle_v4i16_v4i16__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16452,21 +16452,21 @@ define void @s_shuffle_v4i16_v4i16__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16509,22 +16509,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16565,21 +16565,21 @@ define void @s_shuffle_v4i16_v4i16__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16622,22 +16622,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16678,21 +16678,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16733,21 +16733,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16790,22 +16790,22 @@ define void @s_shuffle_v4i16_v4i16__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16846,21 +16846,21 @@ define void @s_shuffle_v4i16_v4i16__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16903,22 +16903,22 @@ define void @s_shuffle_v4i16_v4i16__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -16959,21 +16959,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17014,21 +17014,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17069,21 +17069,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17126,22 +17126,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17184,22 +17184,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17240,21 +17240,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17297,22 +17297,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s2, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s2, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17353,21 +17353,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17402,18 +17402,18 @@ define void @s_shuffle_v4i16_v4i16__u_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17447,18 +17447,18 @@ define void @s_shuffle_v4i16_v4i16__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17492,18 +17492,18 @@ define void @s_shuffle_v4i16_v4i16__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17537,18 +17537,18 @@ define void @s_shuffle_v4i16_v4i16__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17582,18 +17582,18 @@ define void @s_shuffle_v4i16_v4i16__3_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17627,18 +17627,18 @@ define void @s_shuffle_v4i16_v4i16__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -17678,21 +17678,21 @@ define void @s_shuffle_v4i16_v4i16__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17733,21 +17733,21 @@ define void @s_shuffle_v4i16_v4i16__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17788,21 +17788,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17843,21 +17843,21 @@ define void @s_shuffle_v4i16_v4i16__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s3, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s3, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17900,22 +17900,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s3, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s3, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -17956,21 +17956,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18013,22 +18013,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18071,22 +18071,22 @@ define void @s_shuffle_v4i16_v4i16__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18127,21 +18127,21 @@ define void @s_shuffle_v4i16_v4i16__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18184,22 +18184,22 @@ define void @s_shuffle_v4i16_v4i16__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18240,21 +18240,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18293,20 +18293,20 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18347,21 +18347,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18402,21 +18402,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18455,20 +18455,20 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18509,21 +18509,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18564,21 +18564,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18619,21 +18619,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18678,17 +18678,17 @@ define void @s_shuffle_v4i16_v4i16__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -18720,17 +18720,17 @@ define void @s_shuffle_v4i16_v4i16__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -18762,17 +18762,17 @@ define void @s_shuffle_v4i16_v4i16__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -18804,17 +18804,17 @@ define void @s_shuffle_v4i16_v4i16__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) @@ -18864,19 +18864,19 @@ define void @s_shuffle_v4i16_v4i16__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18911,18 +18911,18 @@ define void @s_shuffle_v4i16_v4i16__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -18959,19 +18959,19 @@ define void @s_shuffle_v4i16_v4i16__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19006,18 +19006,18 @@ define void @s_shuffle_v4i16_v4i16__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19060,22 +19060,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19116,21 +19116,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19173,22 +19173,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19229,21 +19229,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19278,18 +19278,18 @@ define void @s_shuffle_v4i16_v4i16__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19326,19 +19326,19 @@ define void @s_shuffle_v4i16_v4i16__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19373,18 +19373,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19419,18 +19419,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshl_b32 s9, s0, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshl_b32 s9, s0, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19471,21 +19471,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19528,22 +19528,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19584,21 +19584,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19641,22 +19641,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19693,19 +19693,19 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19740,18 +19740,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19811,21 +19811,21 @@ define void @s_shuffle_v4i16_v4i16__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19866,21 +19866,21 @@ define void @s_shuffle_v4i16_v4i16__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19921,21 +19921,21 @@ define void @s_shuffle_v4i16_v4i16__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -19976,21 +19976,21 @@ define void @s_shuffle_v4i16_v4i16__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20044,18 +20044,18 @@ define void @s_shuffle_v4i16_v4i16__5_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s0 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20090,18 +20090,18 @@ define void @s_shuffle_v4i16_v4i16__6_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20136,18 +20136,18 @@ define void @s_shuffle_v4i16_v4i16__7_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20182,18 +20182,18 @@ define void @s_shuffle_v4i16_v4i16__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20236,22 +20236,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20292,21 +20292,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20349,22 +20349,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20405,21 +20405,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s2, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20456,19 +20456,19 @@ define void @s_shuffle_v4i16_v4i16__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20505,19 +20505,19 @@ define void @s_shuffle_v4i16_v4i16__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20552,18 +20552,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20598,18 +20598,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20650,21 +20650,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20705,21 +20705,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20760,21 +20760,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20815,21 +20815,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s2 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s2 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20864,18 +20864,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20910,18 +20910,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -20956,18 +20956,18 @@ define void @s_shuffle_v4i16_v4i16__u_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_lshl_b32 s8, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_lshl_b32 s8, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21008,21 +21008,21 @@ define void @s_shuffle_v4i16_v4i16__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21065,22 +21065,22 @@ define void @s_shuffle_v4i16_v4i16__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21121,21 +21121,21 @@ define void @s_shuffle_v4i16_v4i16__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21178,22 +21178,22 @@ define void @s_shuffle_v4i16_v4i16__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21228,18 +21228,18 @@ define void @s_shuffle_v4i16_v4i16__4_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21276,19 +21276,19 @@ define void @s_shuffle_v4i16_v4i16__5_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21323,18 +21323,18 @@ define void @s_shuffle_v4i16_v4i16__6_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_mov_b32 s9, s8 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_mov_b32 s9, s8 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21371,19 +21371,19 @@ define void @s_shuffle_v4i16_v4i16__7_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21418,18 +21418,18 @@ define void @s_shuffle_v4i16_v4i16__7_u_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21472,22 +21472,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21528,21 +21528,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21585,22 +21585,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21641,21 +21641,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21692,19 +21692,19 @@ define void @s_shuffle_v4i16_v4i16__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21739,18 +21739,18 @@ define void @s_shuffle_v4i16_v4i16__7_5_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21785,18 +21785,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21831,18 +21831,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: s_lshl_b32 s9, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: s_lshl_b32 s9, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21883,21 +21883,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21940,22 +21940,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -21996,21 +21996,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22053,22 +22053,22 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22103,18 +22103,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22151,19 +22151,19 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s0, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s0, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22198,18 +22198,18 @@ define void @s_shuffle_v4i16_v4i16__u_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22250,21 +22250,21 @@ define void @s_shuffle_v4i16_v4i16__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22305,21 +22305,21 @@ define void @s_shuffle_v4i16_v4i16__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22360,21 +22360,21 @@ define void @s_shuffle_v4i16_v4i16__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22415,21 +22415,21 @@ define void @s_shuffle_v4i16_v4i16__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22464,18 +22464,18 @@ define void @s_shuffle_v4i16_v4i16__4_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22510,18 +22510,18 @@ define void @s_shuffle_v4i16_v4i16__5_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22556,18 +22556,18 @@ define void @s_shuffle_v4i16_v4i16__6_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22602,18 +22602,18 @@ define void @s_shuffle_v4i16_v4i16__7_u_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s8, s1, 16 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s8, s1, 16 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22656,22 +22656,22 @@ define void @s_shuffle_v4i16_v4i16__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s1, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s1, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22712,21 +22712,21 @@ define void @s_shuffle_v4i16_v4i16__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22769,22 +22769,22 @@ define void @s_shuffle_v4i16_v4i16__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s3, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s3, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22825,21 +22825,21 @@ define void @s_shuffle_v4i16_v4i16__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22876,19 +22876,19 @@ define void @s_shuffle_v4i16_v4i16__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s2, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s2, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s2, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22923,18 +22923,18 @@ define void @s_shuffle_v4i16_v4i16__7_5_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s0 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -22971,19 +22971,19 @@ define void @s_shuffle_v4i16_v4i16__7_6_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_lshr_b32 s0, s1, 16 -; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_lshr_b32 s0, s1, 16 +; GFX942-NEXT: s_pack_ll_b32_b16 s8, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23043,21 +23043,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23098,21 +23098,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23153,21 +23153,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23208,21 +23208,21 @@ define void @s_shuffle_v4i16_v4i16__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s3 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s1, s3 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s3, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23257,18 +23257,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_lh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> @@ -23303,18 +23303,18 @@ define void @s_shuffle_v4i16_v4i16__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s1 -; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_pack_hh_b32_b16 s9, s0, s1 +; GFX942-NEXT: s_pack_hh_b32_b16 s8, s1, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i16> asm "; def $0", "=s"() %vec1 = call <4 x i16> asm "; def $0", "=s"() %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll index 94c61f5ad0e86..9d3affa6da266 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i32_v2i32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i32_v2i32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4i32_v2i32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v4i32_v2i32__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4i32_v2i32__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -234,21 +234,21 @@ define void @v_shuffle_v4i32_v2i32__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -282,19 +282,19 @@ define void @v_shuffle_v4i32_v2i32__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -327,17 +327,17 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -376,20 +376,20 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -429,21 +429,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -478,19 +478,19 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -525,18 +525,18 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -577,21 +577,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -632,21 +632,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -682,18 +682,18 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -730,19 +730,19 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -779,19 +779,19 @@ define void @v_shuffle_v4i32_v2i32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -827,19 +827,19 @@ define void @v_shuffle_v4i32_v2i32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> zeroinitializer store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -874,18 +874,18 @@ define void @v_shuffle_v4i32_v2i32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -921,19 +921,19 @@ define void @v_shuffle_v4i32_v2i32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -974,21 +974,21 @@ define void @v_shuffle_v4i32_v2i32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1029,21 +1029,21 @@ define void @v_shuffle_v4i32_v2i32__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1086,22 +1086,22 @@ define void @v_shuffle_v4i32_v2i32__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1142,21 +1142,21 @@ define void @v_shuffle_v4i32_v2i32__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1197,21 +1197,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1252,21 +1252,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1307,21 +1307,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1364,22 +1364,22 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1414,19 +1414,19 @@ define void @v_shuffle_v4i32_v2i32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1460,19 +1460,19 @@ define void @v_shuffle_v4i32_v2i32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1508,19 +1508,19 @@ define void @v_shuffle_v4i32_v2i32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1554,19 +1554,19 @@ define void @v_shuffle_v4i32_v2i32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1608,22 +1608,22 @@ define void @v_shuffle_v4i32_v2i32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1664,21 +1664,21 @@ define void @v_shuffle_v4i32_v2i32__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1719,21 +1719,21 @@ define void @v_shuffle_v4i32_v2i32__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1774,21 +1774,21 @@ define void @v_shuffle_v4i32_v2i32__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1829,21 +1829,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1882,20 +1882,20 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1934,20 +1934,20 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -1990,22 +1990,22 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2047,16 +2047,16 @@ define void @v_shuffle_v4i32_v2i32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2088,17 +2088,17 @@ define void @v_shuffle_v4i32_v2i32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2144,18 +2144,18 @@ define void @v_shuffle_v4i32_v2i32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2190,18 +2190,18 @@ define void @v_shuffle_v4i32_v2i32__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2242,21 +2242,21 @@ define void @v_shuffle_v4i32_v2i32__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2297,21 +2297,21 @@ define void @v_shuffle_v4i32_v2i32__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2348,19 +2348,19 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2397,19 +2397,19 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2451,23 +2451,23 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2508,21 +2508,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2557,19 +2557,19 @@ define void @v_shuffle_v4i32_v2i32__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2610,21 +2610,21 @@ define void @v_shuffle_v4i32_v2i32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2667,22 +2667,22 @@ define void @v_shuffle_v4i32_v2i32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2717,19 +2717,19 @@ define void @v_shuffle_v4i32_v2i32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2764,18 +2764,18 @@ define void @v_shuffle_v4i32_v2i32__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2816,21 +2816,21 @@ define void @v_shuffle_v4i32_v2i32__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2871,21 +2871,21 @@ define void @v_shuffle_v4i32_v2i32__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2920,18 +2920,18 @@ define void @v_shuffle_v4i32_v2i32__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -2966,18 +2966,18 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3018,21 +3018,21 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3074,22 +3074,22 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3124,19 +3124,19 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v2i32__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v2i32__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=v"() %vec1 = call <2 x i32> asm "; def $0", "=v"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3181,17 +3181,17 @@ define void @s_shuffle_v4i32_v2i32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -3223,17 +3223,17 @@ define void @s_shuffle_v4i32_v2i32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -3279,17 +3279,17 @@ define void @s_shuffle_v4i32_v2i32__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3330,21 +3330,21 @@ define void @s_shuffle_v4i32_v2i32__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3383,20 +3383,20 @@ define void @s_shuffle_v4i32_v2i32__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3431,18 +3431,18 @@ define void @s_shuffle_v4i32_v2i32__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3524,21 +3524,21 @@ define void @s_shuffle_v4i32_v2i32__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3621,22 +3621,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3698,20 +3698,20 @@ define void @s_shuffle_v4i32_v2i32__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3855,22 +3855,22 @@ define void @s_shuffle_v4i32_v2i32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3911,21 +3911,21 @@ define void @s_shuffle_v4i32_v2i32__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -3968,22 +3968,22 @@ define void @s_shuffle_v4i32_v2i32__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4026,22 +4026,22 @@ define void @s_shuffle_v4i32_v2i32__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4105,21 +4105,21 @@ define void @s_shuffle_v4i32_v2i32__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4162,22 +4162,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4220,22 +4220,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4355,22 +4355,22 @@ define void @s_shuffle_v4i32_v2i32__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4411,21 +4411,21 @@ define void @s_shuffle_v4i32_v2i32__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4468,22 +4468,22 @@ define void @s_shuffle_v4i32_v2i32__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4526,22 +4526,22 @@ define void @s_shuffle_v4i32_v2i32__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4651,22 +4651,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4711,17 +4711,17 @@ define void @s_shuffle_v4i32_v2i32__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -4753,17 +4753,17 @@ define void @s_shuffle_v4i32_v2i32__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -4860,22 +4860,22 @@ define void @s_shuffle_v4i32_v2i32__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -4956,19 +4956,19 @@ define void @s_shuffle_v4i32_v2i32__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -5011,22 +5011,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -5071,23 +5071,23 @@ define void @s_shuffle_v4i32_v2i32__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -5173,22 +5173,22 @@ define void @s_shuffle_v4i32_v2i32__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -5271,22 +5271,22 @@ define void @s_shuffle_v4i32_v2i32__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> @@ -5416,22 +5416,22 @@ define void @s_shuffle_v4i32_v2i32__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v2i32__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v2i32__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i32> asm "; def $0", "=s"() %vec1 = call <2 x i32> asm "; def $0", "=s"() %shuf = shufflevector <2 x i32> %vec0, <2 x i32> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll index 1b003a7c5d9bc..1a669adf2b635 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i32_v3i32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i32_v3i32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4i32_v3i32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v4i32_v3i32__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -267,21 +267,21 @@ define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v4i32_v3i32__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -375,21 +375,21 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -423,19 +423,19 @@ define void @v_shuffle_v4i32_v3i32__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -468,17 +468,17 @@ define void @v_shuffle_v4i32_v3i32__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -513,18 +513,18 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -565,22 +565,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -622,23 +622,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -679,22 +679,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -729,19 +729,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -777,19 +777,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -824,18 +824,18 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -877,22 +877,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -933,22 +933,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -990,22 +990,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1043,21 +1043,21 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1094,19 +1094,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1143,19 +1143,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1191,19 +1191,19 @@ define void @v_shuffle_v4i32_v3i32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1239,19 +1239,19 @@ define void @v_shuffle_v4i32_v3i32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> zeroinitializer store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1287,20 +1287,20 @@ define void @v_shuffle_v4i32_v3i32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1337,21 +1337,21 @@ define void @v_shuffle_v4i32_v3i32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1386,19 +1386,19 @@ define void @v_shuffle_v4i32_v3i32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1440,23 +1440,23 @@ define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1500,23 +1500,23 @@ define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1559,22 +1559,22 @@ define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1618,23 +1618,23 @@ define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1678,23 +1678,23 @@ define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1738,23 +1738,23 @@ define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1796,22 +1796,22 @@ define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1855,23 +1855,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1914,22 +1914,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -1972,23 +1972,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2032,23 +2032,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2092,23 +2092,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2151,23 +2151,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2202,18 +2202,18 @@ define void @v_shuffle_v4i32_v3i32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2247,18 +2247,18 @@ define void @v_shuffle_v4i32_v3i32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2294,19 +2294,19 @@ define void @v_shuffle_v4i32_v3i32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2342,19 +2342,19 @@ define void @v_shuffle_v4i32_v3i32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v4i32_v3i32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2441,23 +2441,23 @@ define void @v_shuffle_v4i32_v3i32__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2500,23 +2500,23 @@ define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2558,23 +2558,23 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2616,22 +2616,22 @@ define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2674,23 +2674,23 @@ define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2789,22 +2789,22 @@ define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2847,23 +2847,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2905,22 +2905,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -2961,22 +2961,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3019,23 +3019,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3078,23 +3078,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3137,23 +3137,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3188,18 +3188,18 @@ define void @v_shuffle_v4i32_v3i32__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3233,18 +3233,18 @@ define void @v_shuffle_v4i32_v3i32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3280,19 +3280,19 @@ define void @v_shuffle_v4i32_v3i32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3328,19 +3328,19 @@ define void @v_shuffle_v4i32_v3i32__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3374,18 +3374,18 @@ define void @v_shuffle_v4i32_v3i32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3427,22 +3427,22 @@ define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3485,22 +3485,22 @@ define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3541,21 +3541,21 @@ define void @v_shuffle_v4i32_v3i32__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3598,23 +3598,23 @@ define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3655,21 +3655,21 @@ define void @v_shuffle_v4i32_v3i32__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3712,22 +3712,22 @@ define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3769,22 +3769,22 @@ define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3827,22 +3827,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3884,22 +3884,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -3942,22 +3942,22 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4000,23 +4000,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4060,23 +4060,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4120,24 +4120,24 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4179,16 +4179,16 @@ define void @v_shuffle_v4i32_v3i32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4220,17 +4220,17 @@ define void @v_shuffle_v4i32_v3i32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4262,17 +4262,17 @@ define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4319,20 +4319,20 @@ define void @v_shuffle_v4i32_v3i32__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4370,21 +4370,21 @@ define void @v_shuffle_v4i32_v3i32__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4421,20 +4421,20 @@ define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4477,23 +4477,23 @@ define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4535,23 +4535,23 @@ define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4595,23 +4595,23 @@ define void @v_shuffle_v4i32_v3i32__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4649,21 +4649,21 @@ define void @v_shuffle_v4i32_v3i32__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4701,21 +4701,21 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4752,19 +4752,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4806,23 +4806,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4864,23 +4864,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4922,23 +4922,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -4975,19 +4975,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5022,18 +5022,18 @@ define void @v_shuffle_v4i32_v3i32__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5074,22 +5074,22 @@ define void @v_shuffle_v4i32_v3i32__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5132,23 +5132,23 @@ define void @v_shuffle_v4i32_v3i32__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5191,23 +5191,23 @@ define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5242,18 +5242,18 @@ define void @v_shuffle_v4i32_v3i32__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5290,19 +5290,19 @@ define void @v_shuffle_v4i32_v3i32__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5339,19 +5339,19 @@ define void @v_shuffle_v4i32_v3i32__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5387,19 +5387,19 @@ define void @v_shuffle_v4i32_v3i32__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5442,23 +5442,23 @@ define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5499,22 +5499,22 @@ define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5558,23 +5558,23 @@ define void @v_shuffle_v4i32_v3i32__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5610,19 +5610,19 @@ define void @v_shuffle_v4i32_v3i32__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5659,19 +5659,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5707,19 +5707,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5762,23 +5762,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5822,23 +5822,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5881,23 +5881,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5932,19 +5932,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -5979,18 +5979,18 @@ define void @v_shuffle_v4i32_v3i32__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6032,23 +6032,23 @@ define void @v_shuffle_v4i32_v3i32__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6091,23 +6091,23 @@ define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6150,23 +6150,23 @@ define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6201,18 +6201,18 @@ define void @v_shuffle_v4i32_v3i32__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6249,19 +6249,19 @@ define void @v_shuffle_v4i32_v3i32__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6296,18 +6296,18 @@ define void @v_shuffle_v4i32_v3i32__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6350,22 +6350,22 @@ define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6408,23 +6408,23 @@ define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6467,23 +6467,23 @@ define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6520,20 +6520,20 @@ define void @v_shuffle_v4i32_v3i32__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6568,18 +6568,18 @@ define void @v_shuffle_v4i32_v3i32__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6615,19 +6615,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6670,23 +6670,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6729,23 +6729,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6788,23 +6788,23 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6841,19 +6841,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6889,19 +6889,19 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=v"() %vec1 = call <3 x i32> asm "; def $0", "=v"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -6946,17 +6946,17 @@ define void @s_shuffle_v4i32_v3i32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -6988,17 +6988,17 @@ define void @s_shuffle_v4i32_v3i32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -7030,17 +7030,17 @@ define void @s_shuffle_v4i32_v3i32__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -7086,17 +7086,17 @@ define void @s_shuffle_v4i32_v3i32__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7129,17 +7129,17 @@ define void @s_shuffle_v4i32_v3i32__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7180,21 +7180,21 @@ define void @s_shuffle_v4i32_v3i32__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7233,20 +7233,20 @@ define void @s_shuffle_v4i32_v3i32__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7287,21 +7287,21 @@ define void @s_shuffle_v4i32_v3i32__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7336,18 +7336,18 @@ define void @s_shuffle_v4i32_v3i32__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7401,18 +7401,18 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7455,22 +7455,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7513,22 +7513,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7569,21 +7569,21 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7620,19 +7620,19 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7669,19 +7669,19 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7744,22 +7744,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7802,22 +7802,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7860,22 +7860,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7914,20 +7914,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -7966,20 +7966,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8037,19 +8037,19 @@ define void @s_shuffle_v4i32_v3i32__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -8107,20 +8107,20 @@ define void @s_shuffle_v4i32_v3i32__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -8158,20 +8158,20 @@ define void @s_shuffle_v4i32_v3i32__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -8207,19 +8207,19 @@ define void @s_shuffle_v4i32_v3i32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -8263,23 +8263,23 @@ define void @s_shuffle_v4i32_v3i32__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8324,23 +8324,23 @@ define void @s_shuffle_v4i32_v3i32__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8383,22 +8383,22 @@ define void @s_shuffle_v4i32_v3i32__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8443,23 +8443,23 @@ define void @s_shuffle_v4i32_v3i32__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8504,23 +8504,23 @@ define void @s_shuffle_v4i32_v3i32__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8565,23 +8565,23 @@ define void @s_shuffle_v4i32_v3i32__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8624,22 +8624,22 @@ define void @s_shuffle_v4i32_v3i32__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8684,23 +8684,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8743,22 +8743,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8803,23 +8803,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8864,23 +8864,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8925,23 +8925,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -8986,23 +8986,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9142,22 +9142,22 @@ define void @s_shuffle_v4i32_v3i32__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9200,22 +9200,22 @@ define void @s_shuffle_v4i32_v3i32__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9258,22 +9258,22 @@ define void @s_shuffle_v4i32_v3i32__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9318,23 +9318,23 @@ define void @s_shuffle_v4i32_v3i32__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9379,23 +9379,23 @@ define void @s_shuffle_v4i32_v3i32__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9440,23 +9440,23 @@ define void @s_shuffle_v4i32_v3i32__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @s_shuffle_v4i32_v3i32__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9559,23 +9559,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9618,22 +9618,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9739,23 +9739,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9800,23 +9800,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -9861,23 +9861,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10017,22 +10017,22 @@ define void @s_shuffle_v4i32_v3i32__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10075,22 +10075,22 @@ define void @s_shuffle_v4i32_v3i32__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10131,21 +10131,21 @@ define void @s_shuffle_v4i32_v3i32__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10190,23 +10190,23 @@ define void @s_shuffle_v4i32_v3i32__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10247,21 +10247,21 @@ define void @s_shuffle_v4i32_v3i32__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10304,22 +10304,22 @@ define void @s_shuffle_v4i32_v3i32__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10362,22 +10362,22 @@ define void @s_shuffle_v4i32_v3i32__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10420,22 +10420,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10478,22 +10478,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10538,23 +10538,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10599,23 +10599,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10660,23 +10660,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10721,23 +10721,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10782,17 +10782,17 @@ define void @s_shuffle_v4i32_v3i32__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -10824,17 +10824,17 @@ define void @s_shuffle_v4i32_v3i32__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -10866,17 +10866,17 @@ define void @s_shuffle_v4i32_v3i32__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -10928,20 +10928,20 @@ define void @s_shuffle_v4i32_v3i32__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -10980,20 +10980,20 @@ define void @s_shuffle_v4i32_v3i32__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11030,19 +11030,19 @@ define void @s_shuffle_v4i32_v3i32__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11087,23 +11087,23 @@ define void @s_shuffle_v4i32_v3i32__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11146,22 +11146,22 @@ define void @s_shuffle_v4i32_v3i32__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11206,23 +11206,23 @@ define void @s_shuffle_v4i32_v3i32__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11261,20 +11261,20 @@ define void @s_shuffle_v4i32_v3i32__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11313,20 +11313,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11363,19 +11363,19 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11420,23 +11420,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11481,23 +11481,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11540,22 +11540,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11594,20 +11594,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11670,22 +11670,22 @@ define void @s_shuffle_v4i32_v3i32__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11728,22 +11728,22 @@ define void @s_shuffle_v4i32_v3i32__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11786,22 +11786,22 @@ define void @s_shuffle_v4i32_v3i32__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11900,19 +11900,19 @@ define void @s_shuffle_v4i32_v3i32__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -11957,23 +11957,23 @@ define void @s_shuffle_v4i32_v3i32__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12016,22 +12016,22 @@ define void @s_shuffle_v4i32_v3i32__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12076,23 +12076,23 @@ define void @s_shuffle_v4i32_v3i32__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12131,20 +12131,20 @@ define void @s_shuffle_v4i32_v3i32__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12183,20 +12183,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12233,19 +12233,19 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12290,23 +12290,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12351,23 +12351,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12410,22 +12410,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12464,20 +12464,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12540,22 +12540,22 @@ define void @s_shuffle_v4i32_v3i32__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12598,22 +12598,22 @@ define void @s_shuffle_v4i32_v3i32__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12656,22 +12656,22 @@ define void @s_shuffle_v4i32_v3i32__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12775,22 +12775,22 @@ define void @s_shuffle_v4i32_v3i32__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12833,22 +12833,22 @@ define void @s_shuffle_v4i32_v3i32__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12891,22 +12891,22 @@ define void @s_shuffle_v4i32_v3i32__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -12945,20 +12945,20 @@ define void @s_shuffle_v4i32_v3i32__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13015,19 +13015,19 @@ define void @s_shuffle_v4i32_v3i32__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13072,23 +13072,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13133,23 +13133,23 @@ define void @s_shuffle_v4i32_v3i32__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13192,22 +13192,22 @@ define void @s_shuffle_v4i32_v3i32__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13246,20 +13246,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> @@ -13298,20 +13298,20 @@ define void @s_shuffle_v4i32_v3i32__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i32> asm "; def $0", "=s"() %vec1 = call <3 x i32> asm "; def $0", "=s"() %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll index 47ad1c4bedb8b..983afa566e2c1 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i32_v4i32__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i32_v4i32__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4i32_v4i32__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4i32_v4i32__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -163,17 +163,17 @@ define void @v_shuffle_v4i32_v4i32__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -216,17 +216,17 @@ define void @v_shuffle_v4i32_v4i32__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4i32_v4i32__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4i32_v4i32__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -352,21 +352,21 @@ define void @v_shuffle_v4i32_v4i32__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v4i32_v4i32__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v4i32_v4i32__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -562,17 +562,17 @@ define void @v_shuffle_v4i32_v4i32__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -605,17 +605,17 @@ define void @v_shuffle_v4i32_v4i32__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v4i32_v4i32__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -864,22 +864,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -922,22 +922,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -973,18 +973,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1019,18 +1019,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1065,18 +1065,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1113,19 +1113,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1169,23 +1169,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1230,24 +1230,24 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1291,23 +1291,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1350,23 +1350,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1402,18 +1402,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1452,21 +1452,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1503,18 +1503,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1551,19 +1551,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1600,19 +1600,19 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1648,19 +1648,19 @@ define void @v_shuffle_v4i32_v4i32__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> zeroinitializer store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1696,19 +1696,19 @@ define void @v_shuffle_v4i32_v4i32__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1745,19 +1745,19 @@ define void @v_shuffle_v4i32_v4i32__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1794,19 +1794,19 @@ define void @v_shuffle_v4i32_v4i32__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1842,19 +1842,19 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1897,23 +1897,23 @@ define void @v_shuffle_v4i32_v4i32__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -1958,24 +1958,24 @@ define void @v_shuffle_v4i32_v4i32__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2019,23 +2019,23 @@ define void @v_shuffle_v4i32_v4i32__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2078,23 +2078,23 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2139,24 +2139,24 @@ define void @v_shuffle_v4i32_v4i32__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2199,22 +2199,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2258,22 +2258,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2317,23 +2317,23 @@ define void @v_shuffle_v4i32_v4i32__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2376,23 +2376,23 @@ define void @v_shuffle_v4i32_v4i32__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2436,23 +2436,23 @@ define void @v_shuffle_v4i32_v4i32__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2497,24 +2497,24 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2557,23 +2557,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2617,23 +2617,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2678,23 +2678,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2737,22 +2737,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2796,22 +2796,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2854,23 +2854,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4i32_v4i32__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4i32_v4i32__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4i32_v4i32__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3104,19 +3104,19 @@ define void @v_shuffle_v4i32_v4i32__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3152,19 +3152,19 @@ define void @v_shuffle_v4i32_v4i32__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3198,18 +3198,18 @@ define void @v_shuffle_v4i32_v4i32__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3251,23 +3251,23 @@ define void @v_shuffle_v4i32_v4i32__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3310,23 +3310,23 @@ define void @v_shuffle_v4i32_v4i32__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3369,23 +3369,23 @@ define void @v_shuffle_v4i32_v4i32__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3428,23 +3428,23 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3488,23 +3488,23 @@ define void @v_shuffle_v4i32_v4i32__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3548,22 +3548,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3606,22 +3606,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3665,23 +3665,23 @@ define void @v_shuffle_v4i32_v4i32__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3724,23 +3724,23 @@ define void @v_shuffle_v4i32_v4i32__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3784,23 +3784,23 @@ define void @v_shuffle_v4i32_v4i32__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3845,24 +3845,24 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3905,23 +3905,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -3966,24 +3966,24 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4028,23 +4028,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4089,23 +4089,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4149,22 +4149,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4207,22 +4207,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4265,23 +4265,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4316,18 +4316,18 @@ define void @v_shuffle_v4i32_v4i32__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4361,18 +4361,18 @@ define void @v_shuffle_v4i32_v4i32__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4407,18 +4407,18 @@ define void @v_shuffle_v4i32_v4i32__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4454,19 +4454,19 @@ define void @v_shuffle_v4i32_v4i32__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4501,18 +4501,18 @@ define void @v_shuffle_v4i32_v4i32__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4546,18 +4546,18 @@ define void @v_shuffle_v4i32_v4i32__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4598,22 +4598,22 @@ define void @v_shuffle_v4i32_v4i32__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4656,22 +4656,22 @@ define void @v_shuffle_v4i32_v4i32__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4713,22 +4713,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4769,21 +4769,21 @@ define void @v_shuffle_v4i32_v4i32__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4826,21 +4826,21 @@ define void @v_shuffle_v4i32_v4i32__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4881,21 +4881,21 @@ define void @v_shuffle_v4i32_v4i32__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4938,22 +4938,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -4995,21 +4995,21 @@ define void @v_shuffle_v4i32_v4i32__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5052,22 +5052,22 @@ define void @v_shuffle_v4i32_v4i32__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5109,21 +5109,21 @@ define void @v_shuffle_v4i32_v4i32__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5166,22 +5166,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5284,23 +5284,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5344,22 +5344,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5403,22 +5403,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5462,22 +5462,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5520,23 +5520,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5579,22 +5579,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5629,18 +5629,18 @@ define void @v_shuffle_v4i32_v4i32__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5674,18 +5674,18 @@ define void @v_shuffle_v4i32_v4i32__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5721,19 +5721,19 @@ define void @v_shuffle_v4i32_v4i32__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5769,19 +5769,19 @@ define void @v_shuffle_v4i32_v4i32__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5817,19 +5817,19 @@ define void @v_shuffle_v4i32_v4i32__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5863,18 +5863,18 @@ define void @v_shuffle_v4i32_v4i32__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5916,22 +5916,22 @@ define void @v_shuffle_v4i32_v4i32__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -5974,22 +5974,22 @@ define void @v_shuffle_v4i32_v4i32__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6032,22 +6032,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6088,21 +6088,21 @@ define void @v_shuffle_v4i32_v4i32__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6145,21 +6145,21 @@ define void @v_shuffle_v4i32_v4i32__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6200,21 +6200,21 @@ define void @v_shuffle_v4i32_v4i32__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6256,22 +6256,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6313,21 +6313,21 @@ define void @v_shuffle_v4i32_v4i32__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6370,22 +6370,22 @@ define void @v_shuffle_v4i32_v4i32__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6427,21 +6427,21 @@ define void @v_shuffle_v4i32_v4i32__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6484,22 +6484,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6540,22 +6540,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6600,23 +6600,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6661,23 +6661,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6718,22 +6718,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6776,23 +6776,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6835,23 +6835,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6894,22 +6894,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -6951,16 +6951,16 @@ define void @v_shuffle_v4i32_v4i32__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6992,17 +6992,17 @@ define void @v_shuffle_v4i32_v4i32__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7034,17 +7034,17 @@ define void @v_shuffle_v4i32_v4i32__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7076,17 +7076,17 @@ define void @v_shuffle_v4i32_v4i32__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7133,19 +7133,19 @@ define void @v_shuffle_v4i32_v4i32__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7183,19 +7183,19 @@ define void @v_shuffle_v4i32_v4i32__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7233,19 +7233,19 @@ define void @v_shuffle_v4i32_v4i32__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7282,19 +7282,19 @@ define void @v_shuffle_v4i32_v4i32__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7338,23 +7338,23 @@ define void @v_shuffle_v4i32_v4i32__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7396,23 +7396,23 @@ define void @v_shuffle_v4i32_v4i32__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7455,23 +7455,23 @@ define void @v_shuffle_v4i32_v4i32__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7516,23 +7516,23 @@ define void @v_shuffle_v4i32_v4i32__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7571,20 +7571,20 @@ define void @v_shuffle_v4i32_v4i32__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7621,19 +7621,19 @@ define void @v_shuffle_v4i32_v4i32__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7671,19 +7671,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7719,18 +7719,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7774,22 +7774,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7833,23 +7833,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7891,23 +7891,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -7951,23 +7951,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8003,18 +8003,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8053,21 +8053,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8102,18 +8102,18 @@ define void @v_shuffle_v4i32_v4i32__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8154,22 +8154,22 @@ define void @v_shuffle_v4i32_v4i32__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8212,23 +8212,23 @@ define void @v_shuffle_v4i32_v4i32__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8271,22 +8271,22 @@ define void @v_shuffle_v4i32_v4i32__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8329,22 +8329,22 @@ define void @v_shuffle_v4i32_v4i32__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8379,18 +8379,18 @@ define void @v_shuffle_v4i32_v4i32__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8427,19 +8427,19 @@ define void @v_shuffle_v4i32_v4i32__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8476,19 +8476,19 @@ define void @v_shuffle_v4i32_v4i32__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4i32_v4i32__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8574,19 +8574,19 @@ define void @v_shuffle_v4i32_v4i32__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8629,22 +8629,22 @@ define void @v_shuffle_v4i32_v4i32__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8685,22 +8685,22 @@ define void @v_shuffle_v4i32_v4i32__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8744,23 +8744,23 @@ define void @v_shuffle_v4i32_v4i32__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8805,23 +8805,23 @@ define void @v_shuffle_v4i32_v4i32__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8859,19 +8859,19 @@ define void @v_shuffle_v4i32_v4i32__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8908,19 +8908,19 @@ define void @v_shuffle_v4i32_v4i32__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -8957,19 +8957,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9004,18 +9004,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9058,22 +9058,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9116,22 +9116,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9174,23 +9174,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9235,23 +9235,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9288,19 +9288,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9339,21 +9339,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9388,18 +9388,18 @@ define void @v_shuffle_v4i32_v4i32__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9441,23 +9441,23 @@ define void @v_shuffle_v4i32_v4i32__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @v_shuffle_v4i32_v4i32__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9557,22 +9557,22 @@ define void @v_shuffle_v4i32_v4i32__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9614,22 +9614,22 @@ define void @v_shuffle_v4i32_v4i32__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9664,18 +9664,18 @@ define void @v_shuffle_v4i32_v4i32__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9711,18 +9711,18 @@ define void @v_shuffle_v4i32_v4i32__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9759,19 +9759,19 @@ define void @v_shuffle_v4i32_v4i32__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9807,18 +9807,18 @@ define void @v_shuffle_v4i32_v4i32__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9853,18 +9853,18 @@ define void @v_shuffle_v4i32_v4i32__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9906,22 +9906,22 @@ define void @v_shuffle_v4i32_v4i32__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -9964,23 +9964,23 @@ define void @v_shuffle_v4i32_v4i32__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10022,22 +10022,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10080,22 +10080,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10131,18 +10131,18 @@ define void @v_shuffle_v4i32_v4i32__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10177,18 +10177,18 @@ define void @v_shuffle_v4i32_v4i32__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10225,19 +10225,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10274,19 +10274,19 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10331,23 +10331,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10391,23 +10391,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10450,23 +10450,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10510,23 +10510,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10565,21 +10565,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10616,18 +10616,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10662,18 +10662,18 @@ define void @v_shuffle_v4i32_v4i32__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10716,23 +10716,23 @@ define void @v_shuffle_v4i32_v4i32__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10775,23 +10775,23 @@ define void @v_shuffle_v4i32_v4i32__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10834,22 +10834,22 @@ define void @v_shuffle_v4i32_v4i32__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10892,22 +10892,22 @@ define void @v_shuffle_v4i32_v4i32__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10942,18 +10942,18 @@ define void @v_shuffle_v4i32_v4i32__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -10990,19 +10990,19 @@ define void @v_shuffle_v4i32_v4i32__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11039,19 +11039,19 @@ define void @v_shuffle_v4i32_v4i32__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11086,18 +11086,18 @@ define void @v_shuffle_v4i32_v4i32__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11139,22 +11139,22 @@ define void @v_shuffle_v4i32_v4i32__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11197,23 +11197,23 @@ define void @v_shuffle_v4i32_v4i32__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11255,22 +11255,22 @@ define void @v_shuffle_v4i32_v4i32__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11313,22 +11313,22 @@ define void @v_shuffle_v4i32_v4i32__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11365,18 +11365,18 @@ define void @v_shuffle_v4i32_v4i32__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11411,18 +11411,18 @@ define void @v_shuffle_v4i32_v4i32__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11458,18 +11458,18 @@ define void @v_shuffle_v4i32_v4i32__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11504,18 +11504,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11558,23 +11558,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11617,23 +11617,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11676,23 +11676,23 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11735,22 +11735,22 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11789,21 +11789,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11842,21 +11842,21 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11891,18 +11891,18 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i32_v4i32__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=v"() %vec1 = call <4 x i32> asm "; def $0", "=v"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -11947,17 +11947,17 @@ define void @s_shuffle_v4i32_v4i32__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -11989,17 +11989,17 @@ define void @s_shuffle_v4i32_v4i32__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -12031,17 +12031,17 @@ define void @s_shuffle_v4i32_v4i32__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -12073,17 +12073,17 @@ define void @s_shuffle_v4i32_v4i32__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -12129,17 +12129,17 @@ define void @s_shuffle_v4i32_v4i32__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12172,17 +12172,17 @@ define void @s_shuffle_v4i32_v4i32__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12215,17 +12215,17 @@ define void @s_shuffle_v4i32_v4i32__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12266,21 +12266,21 @@ define void @s_shuffle_v4i32_v4i32__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12319,20 +12319,20 @@ define void @s_shuffle_v4i32_v4i32__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12373,21 +12373,21 @@ define void @s_shuffle_v4i32_v4i32__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12428,21 +12428,21 @@ define void @s_shuffle_v4i32_v4i32__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12477,18 +12477,18 @@ define void @s_shuffle_v4i32_v4i32__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12542,18 +12542,18 @@ define void @s_shuffle_v4i32_v4i32__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12588,18 +12588,18 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12642,22 +12642,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12700,22 +12700,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12756,21 +12756,21 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12813,22 +12813,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12865,19 +12865,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12914,19 +12914,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -12983,19 +12983,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13040,23 +13040,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13101,23 +13101,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13162,23 +13162,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13221,22 +13221,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13275,20 +13275,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13327,20 +13327,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13379,20 +13379,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13450,19 +13450,19 @@ define void @s_shuffle_v4i32_v4i32__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -13520,20 +13520,20 @@ define void @s_shuffle_v4i32_v4i32__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -13571,20 +13571,20 @@ define void @s_shuffle_v4i32_v4i32__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -13622,20 +13622,20 @@ define void @s_shuffle_v4i32_v4i32__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -13671,19 +13671,19 @@ define void @s_shuffle_v4i32_v4i32__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -13727,23 +13727,23 @@ define void @s_shuffle_v4i32_v4i32__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13788,23 +13788,23 @@ define void @s_shuffle_v4i32_v4i32__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13849,23 +13849,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13908,22 +13908,22 @@ define void @s_shuffle_v4i32_v4i32__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -13968,23 +13968,23 @@ define void @s_shuffle_v4i32_v4i32__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14029,23 +14029,23 @@ define void @s_shuffle_v4i32_v4i32__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14090,23 +14090,23 @@ define void @s_shuffle_v4i32_v4i32__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14151,23 +14151,23 @@ define void @s_shuffle_v4i32_v4i32__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14210,22 +14210,22 @@ define void @s_shuffle_v4i32_v4i32__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14270,23 +14270,23 @@ define void @s_shuffle_v4i32_v4i32__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14331,23 +14331,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14390,22 +14390,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14450,23 +14450,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14511,23 +14511,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14572,23 +14572,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14633,23 +14633,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14694,23 +14694,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14753,22 +14753,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14928,22 +14928,22 @@ define void @s_shuffle_v4i32_v4i32__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -14986,22 +14986,22 @@ define void @s_shuffle_v4i32_v4i32__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15044,22 +15044,22 @@ define void @s_shuffle_v4i32_v4i32__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15102,22 +15102,22 @@ define void @s_shuffle_v4i32_v4i32__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15162,23 +15162,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15223,23 +15223,23 @@ define void @s_shuffle_v4i32_v4i32__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15284,23 +15284,23 @@ define void @s_shuffle_v4i32_v4i32__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15345,23 +15345,23 @@ define void @s_shuffle_v4i32_v4i32__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15404,22 +15404,22 @@ define void @s_shuffle_v4i32_v4i32__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15464,23 +15464,23 @@ define void @s_shuffle_v4i32_v4i32__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15525,23 +15525,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15584,22 +15584,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15644,23 +15644,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15705,23 +15705,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15766,23 +15766,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15827,23 +15827,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15888,23 +15888,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -15947,22 +15947,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16122,22 +16122,22 @@ define void @s_shuffle_v4i32_v4i32__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16180,22 +16180,22 @@ define void @s_shuffle_v4i32_v4i32__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16238,22 +16238,22 @@ define void @s_shuffle_v4i32_v4i32__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16294,21 +16294,21 @@ define void @s_shuffle_v4i32_v4i32__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16353,23 +16353,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16410,21 +16410,21 @@ define void @s_shuffle_v4i32_v4i32__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16467,22 +16467,22 @@ define void @s_shuffle_v4i32_v4i32__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16525,22 +16525,22 @@ define void @s_shuffle_v4i32_v4i32__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16583,22 +16583,22 @@ define void @s_shuffle_v4i32_v4i32__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16641,22 +16641,22 @@ define void @s_shuffle_v4i32_v4i32__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16699,22 +16699,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16757,22 +16757,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16817,23 +16817,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16878,23 +16878,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -16939,23 +16939,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17000,23 +17000,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17061,23 +17061,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17120,22 +17120,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17295,22 +17295,22 @@ define void @s_shuffle_v4i32_v4i32__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17353,22 +17353,22 @@ define void @s_shuffle_v4i32_v4i32__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17411,22 +17411,22 @@ define void @s_shuffle_v4i32_v4i32__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17467,21 +17467,21 @@ define void @s_shuffle_v4i32_v4i32__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17526,23 +17526,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17583,21 +17583,21 @@ define void @s_shuffle_v4i32_v4i32__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17640,22 +17640,22 @@ define void @s_shuffle_v4i32_v4i32__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17698,22 +17698,22 @@ define void @s_shuffle_v4i32_v4i32__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17756,22 +17756,22 @@ define void @s_shuffle_v4i32_v4i32__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17814,22 +17814,22 @@ define void @s_shuffle_v4i32_v4i32__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17872,22 +17872,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17928,21 +17928,21 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -17987,23 +17987,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18048,23 +18048,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18105,21 +18105,21 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18162,22 +18162,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18220,22 +18220,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18278,22 +18278,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18338,17 +18338,17 @@ define void @s_shuffle_v4i32_v4i32__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -18380,17 +18380,17 @@ define void @s_shuffle_v4i32_v4i32__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -18422,17 +18422,17 @@ define void @s_shuffle_v4i32_v4i32__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -18464,17 +18464,17 @@ define void @s_shuffle_v4i32_v4i32__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) @@ -18526,20 +18526,20 @@ define void @s_shuffle_v4i32_v4i32__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18578,20 +18578,20 @@ define void @s_shuffle_v4i32_v4i32__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18630,20 +18630,20 @@ define void @s_shuffle_v4i32_v4i32__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18680,19 +18680,19 @@ define void @s_shuffle_v4i32_v4i32__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18737,23 +18737,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18796,22 +18796,22 @@ define void @s_shuffle_v4i32_v4i32__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18856,23 +18856,23 @@ define void @s_shuffle_v4i32_v4i32__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18917,23 +18917,23 @@ define void @s_shuffle_v4i32_v4i32__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -18972,20 +18972,20 @@ define void @s_shuffle_v4i32_v4i32__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19024,20 +19024,20 @@ define void @s_shuffle_v4i32_v4i32__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19076,20 +19076,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19126,19 +19126,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19183,23 +19183,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19244,23 +19244,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19303,22 +19303,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19363,23 +19363,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19418,20 +19418,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19470,20 +19470,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19546,22 +19546,22 @@ define void @s_shuffle_v4i32_v4i32__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19604,22 +19604,22 @@ define void @s_shuffle_v4i32_v4i32__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19662,22 +19662,22 @@ define void @s_shuffle_v4i32_v4i32__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19720,22 +19720,22 @@ define void @s_shuffle_v4i32_v4i32__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19855,19 +19855,19 @@ define void @s_shuffle_v4i32_v4i32__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19912,23 +19912,23 @@ define void @s_shuffle_v4i32_v4i32__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -19971,22 +19971,22 @@ define void @s_shuffle_v4i32_v4i32__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20031,23 +20031,23 @@ define void @s_shuffle_v4i32_v4i32__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20092,23 +20092,23 @@ define void @s_shuffle_v4i32_v4i32__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20147,20 +20147,20 @@ define void @s_shuffle_v4i32_v4i32__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20199,20 +20199,20 @@ define void @s_shuffle_v4i32_v4i32__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20251,20 +20251,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20301,19 +20301,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20358,23 +20358,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20419,23 +20419,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20478,22 +20478,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20538,23 +20538,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20593,20 +20593,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20645,20 +20645,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20721,22 +20721,22 @@ define void @s_shuffle_v4i32_v4i32__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20779,22 +20779,22 @@ define void @s_shuffle_v4i32_v4i32__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20837,22 +20837,22 @@ define void @s_shuffle_v4i32_v4i32__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -20895,22 +20895,22 @@ define void @s_shuffle_v4i32_v4i32__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21056,22 +21056,22 @@ define void @s_shuffle_v4i32_v4i32__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21114,22 +21114,22 @@ define void @s_shuffle_v4i32_v4i32__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21172,22 +21172,22 @@ define void @s_shuffle_v4i32_v4i32__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21230,22 +21230,22 @@ define void @s_shuffle_v4i32_v4i32__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21284,20 +21284,20 @@ define void @s_shuffle_v4i32_v4i32__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21375,19 +21375,19 @@ define void @s_shuffle_v4i32_v4i32__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21432,23 +21432,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21493,23 +21493,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21552,22 +21552,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21612,23 +21612,23 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21667,20 +21667,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21719,20 +21719,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21795,22 +21795,22 @@ define void @s_shuffle_v4i32_v4i32__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21853,22 +21853,22 @@ define void @s_shuffle_v4i32_v4i32__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21911,22 +21911,22 @@ define void @s_shuffle_v4i32_v4i32__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -21969,22 +21969,22 @@ define void @s_shuffle_v4i32_v4i32__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22109,22 +22109,22 @@ define void @s_shuffle_v4i32_v4i32__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22167,22 +22167,22 @@ define void @s_shuffle_v4i32_v4i32__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22225,22 +22225,22 @@ define void @s_shuffle_v4i32_v4i32__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22283,22 +22283,22 @@ define void @s_shuffle_v4i32_v4i32__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22337,20 +22337,20 @@ define void @s_shuffle_v4i32_v4i32__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22454,22 +22454,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22512,22 +22512,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22570,22 +22570,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22628,22 +22628,22 @@ define void @s_shuffle_v4i32_v4i32__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22682,20 +22682,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> @@ -22734,20 +22734,20 @@ define void @s_shuffle_v4i32_v4i32__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i32_v4i32__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i32_v4i32__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i32> asm "; def $0", "=s"() %vec1 = call <4 x i32> asm "; def $0", "=s"() %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll index 24c3b2f5b8796..ac7d9557ce765 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i64_v2i64__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4i64_v2i64__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -81,18 +81,18 @@ define void @v_shuffle_v4i64_v2i64__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -137,18 +137,18 @@ define void @v_shuffle_v4i64_v2i64__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v4i64_v2i64__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v4i64_v2i64__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v4i64_v2i64__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -404,23 +404,23 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -467,24 +467,24 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -523,20 +523,20 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -573,19 +573,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -636,26 +636,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -702,24 +702,24 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -764,23 +764,23 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -817,19 +817,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -866,19 +866,19 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -914,19 +914,19 @@ define void @v_shuffle_v4i64_v2i64__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -966,22 +966,22 @@ define void @v_shuffle_v4i64_v2i64__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1017,19 +1017,19 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1081,28 +1081,28 @@ define void @v_shuffle_v4i64_v2i64__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1151,26 +1151,26 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1221,26 +1221,26 @@ define void @v_shuffle_v4i64_v2i64__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1293,27 +1293,27 @@ define void @v_shuffle_v4i64_v2i64__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1362,26 +1362,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1430,26 +1430,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1498,26 +1498,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1568,26 +1568,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1624,19 +1624,19 @@ define void @v_shuffle_v4i64_v2i64__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1676,22 +1676,22 @@ define void @v_shuffle_v4i64_v2i64__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1727,19 +1727,19 @@ define void @v_shuffle_v4i64_v2i64__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1775,19 +1775,19 @@ define void @v_shuffle_v4i64_v2i64__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1835,25 +1835,25 @@ define void @v_shuffle_v4i64_v2i64__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1902,25 +1902,25 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -1975,28 +1975,28 @@ define void @v_shuffle_v4i64_v2i64__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2049,27 +2049,27 @@ define void @v_shuffle_v4i64_v2i64__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2116,24 +2116,24 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2176,22 +2176,22 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2234,22 +2234,22 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2296,24 +2296,24 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2355,16 +2355,16 @@ define void @v_shuffle_v4i64_v2i64__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2398,18 +2398,18 @@ define void @v_shuffle_v4i64_v2i64__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2460,22 +2460,22 @@ define void @v_shuffle_v4i64_v2i64__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2520,23 +2520,23 @@ define void @v_shuffle_v4i64_v2i64__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2591,29 +2591,29 @@ define void @v_shuffle_v4i64_v2i64__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2664,27 +2664,27 @@ define void @v_shuffle_v4i64_v2i64__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2729,23 +2729,23 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2786,21 +2786,21 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2845,23 +2845,23 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2912,26 +2912,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -2968,19 +2968,19 @@ define void @v_shuffle_v4i64_v2i64__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3023,23 +3023,23 @@ define void @v_shuffle_v4i64_v2i64__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3088,25 +3088,25 @@ define void @v_shuffle_v4i64_v2i64__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3147,22 +3147,22 @@ define void @v_shuffle_v4i64_v2i64__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3199,19 +3199,19 @@ define void @v_shuffle_v4i64_v2i64__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3260,26 +3260,26 @@ define void @v_shuffle_v4i64_v2i64__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3326,24 +3326,24 @@ define void @v_shuffle_v4i64_v2i64__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3390,25 +3390,25 @@ define void @v_shuffle_v4i64_v2i64__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3447,20 +3447,20 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3503,23 +3503,23 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3568,26 +3568,26 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3626,20 +3626,20 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v2i64__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=v"() %vec1 = call <2 x i64> asm "; def $0", "=v"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3684,17 +3684,17 @@ define void @s_shuffle_v4i64_v2i64__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -3728,18 +3728,18 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -3787,18 +3787,18 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3843,23 +3843,23 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3900,21 +3900,21 @@ define void @s_shuffle_v4i64_v2i64__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3953,20 +3953,20 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4054,23 +4054,23 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4163,25 +4163,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4253,24 +4253,24 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4431,25 +4431,25 @@ define void @s_shuffle_v4i64_v2i64__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4494,23 +4494,23 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4559,25 +4559,25 @@ define void @s_shuffle_v4i64_v2i64__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4626,25 +4626,25 @@ define void @s_shuffle_v4i64_v2i64__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4714,23 +4714,23 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4779,25 +4779,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4846,25 +4846,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4999,25 +4999,25 @@ define void @s_shuffle_v4i64_v2i64__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5062,23 +5062,23 @@ define void @s_shuffle_v4i64_v2i64__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5127,25 +5127,25 @@ define void @s_shuffle_v4i64_v2i64__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5194,25 +5194,25 @@ define void @s_shuffle_v4i64_v2i64__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5332,25 +5332,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5395,17 +5395,17 @@ define void @s_shuffle_v4i64_v2i64__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -5439,18 +5439,18 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -5558,25 +5558,25 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5668,22 +5668,22 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5732,25 +5732,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5803,27 +5803,27 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5919,25 +5919,25 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6030,25 +6030,25 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6193,25 +6193,25 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v2i64__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll index 1851a34d0e560..8dd4a40d00680 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i64_v3i64__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v4i64_v3i64__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -123,18 +123,18 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -175,16 +175,16 @@ define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -219,18 +219,18 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -275,24 +275,24 @@ define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -333,22 +333,22 @@ define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -389,22 +389,22 @@ define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -443,20 +443,20 @@ define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -491,18 +491,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -537,18 +537,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -591,22 +591,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -649,22 +649,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -711,24 +711,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -767,20 +767,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -819,20 +819,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -875,22 +875,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -943,28 +943,28 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1011,25 +1011,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1076,24 +1076,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1138,23 +1138,23 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1197,22 +1197,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1249,19 +1249,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1298,19 +1298,19 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1346,19 +1346,19 @@ define void @v_shuffle_v4i64_v3i64__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1398,21 +1398,21 @@ define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1458,24 +1458,24 @@ define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1511,19 +1511,19 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1569,25 +1569,25 @@ define void @v_shuffle_v4i64_v3i64__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1638,27 +1638,27 @@ define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1707,26 +1707,26 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1777,27 +1777,27 @@ define void @v_shuffle_v4i64_v3i64__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1846,25 +1846,25 @@ define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1917,27 +1917,27 @@ define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -1986,26 +1986,26 @@ define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2052,25 +2052,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2117,25 +2117,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2182,25 +2182,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2251,26 +2251,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2319,26 +2319,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2391,28 +2391,28 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2449,19 +2449,19 @@ define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2497,19 +2497,19 @@ define void @v_shuffle_v4i64_v3i64__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2545,19 +2545,19 @@ define void @v_shuffle_v4i64_v3i64__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2599,22 +2599,22 @@ define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2650,19 +2650,19 @@ define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2708,25 +2708,25 @@ define void @v_shuffle_v4i64_v3i64__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2773,25 +2773,25 @@ define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2838,25 +2838,25 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2909,28 +2909,28 @@ define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -2979,25 +2979,25 @@ define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3048,26 +3048,26 @@ define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3116,26 +3116,26 @@ define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3182,25 +3182,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3243,22 +3243,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3301,22 +3301,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3363,24 +3363,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3429,26 +3429,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3495,25 +3495,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3550,19 +3550,19 @@ define void @v_shuffle_v4i64_v3i64__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3598,19 +3598,19 @@ define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3650,22 +3650,22 @@ define void @v_shuffle_v4i64_v3i64__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3701,19 +3701,19 @@ define void @v_shuffle_v4i64_v3i64__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3749,19 +3749,19 @@ define void @v_shuffle_v4i64_v3i64__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3807,24 +3807,24 @@ define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3873,25 +3873,25 @@ define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -3938,24 +3938,24 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4008,27 +4008,27 @@ define void @v_shuffle_v4i64_v3i64__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4079,26 +4079,26 @@ define void @v_shuffle_v4i64_v3i64__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4149,26 +4149,26 @@ define void @v_shuffle_v4i64_v3i64__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4215,24 +4215,24 @@ define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4279,24 +4279,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4339,22 +4339,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4401,24 +4401,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4461,22 +4461,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4525,26 +4525,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4591,24 +4591,24 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4652,17 +4652,17 @@ define void @v_shuffle_v4i64_v3i64__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4692,16 +4692,16 @@ define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4735,18 +4735,18 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4797,21 +4797,21 @@ define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4858,24 +4858,24 @@ define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4918,22 +4918,22 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -4986,28 +4986,28 @@ define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5054,25 +5054,25 @@ define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5119,24 +5119,24 @@ define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5181,23 +5181,23 @@ define void @v_shuffle_v4i64_v3i64__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5240,22 +5240,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5298,22 +5298,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5356,22 +5356,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5414,22 +5414,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5480,26 +5480,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5544,23 +5544,23 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5597,19 +5597,19 @@ define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5656,25 +5656,25 @@ define void @v_shuffle_v4i64_v3i64__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5721,25 +5721,25 @@ define void @v_shuffle_v4i64_v3i64__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5786,24 +5786,24 @@ define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5840,19 +5840,19 @@ define void @v_shuffle_v4i64_v3i64__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5889,19 +5889,19 @@ define void @v_shuffle_v4i64_v3i64__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -5944,22 +5944,22 @@ define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6002,22 +6002,22 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6070,28 +6070,28 @@ define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6138,25 +6138,25 @@ define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6203,24 +6203,24 @@ define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6269,25 +6269,25 @@ define void @v_shuffle_v4i64_v3i64__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6330,22 +6330,22 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6384,20 +6384,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6444,25 +6444,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6509,25 +6509,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6576,26 +6576,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6634,20 +6634,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6684,19 +6684,19 @@ define void @v_shuffle_v4i64_v3i64__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6743,25 +6743,25 @@ define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6808,25 +6808,25 @@ define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6875,25 +6875,25 @@ define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6930,19 +6930,19 @@ define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -6983,22 +6983,22 @@ define void @v_shuffle_v4i64_v3i64__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7039,21 +7039,21 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7104,27 +7104,27 @@ define void @v_shuffle_v4i64_v3i64__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7171,25 +7171,25 @@ define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7236,24 +7236,24 @@ define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7296,22 +7296,22 @@ define void @v_shuffle_v4i64_v3i64__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7356,23 +7356,23 @@ define void @v_shuffle_v4i64_v3i64__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7411,20 +7411,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7471,25 +7471,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7536,25 +7536,25 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7603,26 +7603,26 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7659,19 +7659,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7710,20 +7710,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=v"() %vec1 = call <3 x i64> asm "; def $0", "=v"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7768,17 +7768,17 @@ define void @s_shuffle_v4i64_v3i64__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -7812,18 +7812,18 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -7853,18 +7853,18 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -7912,18 +7912,18 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -7954,18 +7954,18 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8010,21 +8010,21 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8061,21 +8061,21 @@ define void @s_shuffle_v4i64_v3i64__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8116,23 +8116,23 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8167,20 +8167,20 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8239,20 +8239,20 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8301,25 +8301,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8368,25 +8368,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8431,23 +8431,23 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8486,22 +8486,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8544,22 +8544,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8630,25 +8630,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8697,25 +8697,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8764,25 +8764,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8829,24 +8829,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8889,24 +8889,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8973,22 +8973,22 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -9057,24 +9057,24 @@ define void @s_shuffle_v4i64_v3i64__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -9116,24 +9116,24 @@ define void @s_shuffle_v4i64_v3i64__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -9175,22 +9175,22 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -9242,27 +9242,27 @@ define void @s_shuffle_v4i64_v3i64__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9315,25 +9315,25 @@ define void @s_shuffle_v4i64_v3i64__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9382,23 +9382,23 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9451,25 +9451,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9518,27 +9518,27 @@ define void @s_shuffle_v4i64_v3i64__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9591,25 +9591,25 @@ define void @s_shuffle_v4i64_v3i64__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9658,25 +9658,25 @@ define void @s_shuffle_v4i64_v3i64__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9729,27 +9729,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9798,25 +9798,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9869,27 +9869,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -9942,27 +9942,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10011,25 +10011,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10082,27 +10082,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10260,25 +10260,25 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10323,25 +10323,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10390,23 +10390,23 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10459,25 +10459,25 @@ define void @s_shuffle_v4i64_v3i64__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10522,27 +10522,27 @@ define void @s_shuffle_v4i64_v3i64__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10595,25 +10595,25 @@ define void @s_shuffle_v4i64_v3i64__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10662,25 +10662,25 @@ define void @s_shuffle_v4i64_v3i64__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10733,27 +10733,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10802,25 +10802,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10873,27 +10873,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -10942,27 +10942,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11011,25 +11011,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11082,27 +11082,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11260,25 +11260,25 @@ define void @s_shuffle_v4i64_v3i64__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11323,25 +11323,25 @@ define void @s_shuffle_v4i64_v3i64__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11382,23 +11382,23 @@ define void @s_shuffle_v4i64_v3i64__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11447,27 +11447,27 @@ define void @s_shuffle_v4i64_v3i64__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11508,23 +11508,23 @@ define void @s_shuffle_v4i64_v3i64__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11569,25 +11569,25 @@ define void @s_shuffle_v4i64_v3i64__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11636,25 +11636,25 @@ define void @s_shuffle_v4i64_v3i64__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11703,25 +11703,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11770,25 +11770,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11837,27 +11837,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11910,27 +11910,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -11983,25 +11983,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12054,27 +12054,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12119,17 +12119,17 @@ define void @s_shuffle_v4i64_v3i64__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -12163,18 +12163,18 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -12204,18 +12204,18 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -12275,24 +12275,24 @@ define void @s_shuffle_v4i64_v3i64__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12335,24 +12335,24 @@ define void @s_shuffle_v4i64_v3i64__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12391,22 +12391,22 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12459,25 +12459,25 @@ define void @s_shuffle_v4i64_v3i64__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12522,25 +12522,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12589,27 +12589,27 @@ define void @s_shuffle_v4i64_v3i64__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12652,24 +12652,24 @@ define void @s_shuffle_v4i64_v3i64__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12716,24 +12716,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12776,22 +12776,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12844,27 +12844,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12917,27 +12917,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -12986,25 +12986,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13051,24 +13051,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13135,25 +13135,25 @@ define void @s_shuffle_v4i64_v3i64__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13202,25 +13202,25 @@ define void @s_shuffle_v4i64_v3i64__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13269,25 +13269,25 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13396,22 +13396,22 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13460,25 +13460,25 @@ define void @s_shuffle_v4i64_v3i64__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13523,25 +13523,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13590,25 +13590,25 @@ define void @s_shuffle_v4i64_v3i64__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13651,24 +13651,24 @@ define void @s_shuffle_v4i64_v3i64__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13711,24 +13711,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13767,22 +13767,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13831,25 +13831,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13898,25 +13898,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13965,25 +13965,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14022,24 +14022,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14110,25 +14110,25 @@ define void @s_shuffle_v4i64_v3i64__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14177,25 +14177,25 @@ define void @s_shuffle_v4i64_v3i64__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14244,25 +14244,25 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14379,25 +14379,25 @@ define void @s_shuffle_v4i64_v3i64__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14446,25 +14446,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14513,25 +14513,25 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14578,24 +14578,24 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14660,22 +14660,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14728,27 +14728,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14801,27 +14801,27 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14870,25 +14870,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14931,24 +14931,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14995,24 +14995,24 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll index ab0dbd2f3ba42..ea9ef2f1ac94a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4i64_v4i64__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v4i64_v4i64__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v4i64_v4i64__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -119,16 +119,16 @@ define void @v_shuffle_v4i64_v4i64__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -162,18 +162,18 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -214,16 +214,16 @@ define void @v_shuffle_v4i64_v4i64__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v4i64_v4i64__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -298,18 +298,18 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -354,24 +354,24 @@ define void @v_shuffle_v4i64_v4i64__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -412,22 +412,22 @@ define void @v_shuffle_v4i64_v4i64__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -468,22 +468,22 @@ define void @v_shuffle_v4i64_v4i64__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -524,22 +524,22 @@ define void @v_shuffle_v4i64_v4i64__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -578,20 +578,20 @@ define void @v_shuffle_v4i64_v4i64__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v4i64_v4i64__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -672,18 +672,18 @@ define void @v_shuffle_v4i64_v4i64__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -718,18 +718,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -772,22 +772,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -830,22 +830,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -888,22 +888,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -950,24 +950,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1004,19 +1004,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1055,20 +1055,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1107,20 +1107,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1161,21 +1161,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1226,27 +1226,27 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1293,25 +1293,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1358,25 +1358,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1423,24 +1423,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1487,24 +1487,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1545,21 +1545,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1602,22 +1602,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1654,19 +1654,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -1703,19 +1703,19 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1751,19 +1751,19 @@ define void @v_shuffle_v4i64_v4i64__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1803,21 +1803,21 @@ define void @v_shuffle_v4i64_v4i64__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1857,21 +1857,21 @@ define void @v_shuffle_v4i64_v4i64__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1915,23 +1915,23 @@ define void @v_shuffle_v4i64_v4i64__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1967,19 +1967,19 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2025,25 +2025,25 @@ define void @v_shuffle_v4i64_v4i64__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2090,25 +2090,25 @@ define void @v_shuffle_v4i64_v4i64__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2159,27 +2159,27 @@ define void @v_shuffle_v4i64_v4i64__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2228,26 +2228,26 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2299,27 +2299,27 @@ define void @v_shuffle_v4i64_v4i64__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2368,25 +2368,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2433,24 +2433,24 @@ define void @v_shuffle_v4i64_v4i64__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2503,27 +2503,27 @@ define void @v_shuffle_v4i64_v4i64__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2572,26 +2572,26 @@ define void @v_shuffle_v4i64_v4i64__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2638,25 +2638,25 @@ define void @v_shuffle_v4i64_v4i64__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2703,25 +2703,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2768,25 +2768,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2833,25 +2833,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2898,25 +2898,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -2967,26 +2967,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3033,25 +3033,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3100,26 +3100,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3170,27 +3170,27 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3227,19 +3227,19 @@ define void @v_shuffle_v4i64_v4i64__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3275,19 +3275,19 @@ define void @v_shuffle_v4i64_v4i64__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3323,19 +3323,19 @@ define void @v_shuffle_v4i64_v4i64__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3375,21 +3375,21 @@ define void @v_shuffle_v4i64_v4i64__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3429,21 +3429,21 @@ define void @v_shuffle_v4i64_v4i64__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3479,19 +3479,19 @@ define void @v_shuffle_v4i64_v4i64__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3537,25 +3537,25 @@ define void @v_shuffle_v4i64_v4i64__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3602,25 +3602,25 @@ define void @v_shuffle_v4i64_v4i64__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3667,25 +3667,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3732,25 +3732,25 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3802,28 +3802,28 @@ define void @v_shuffle_v4i64_v4i64__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3872,25 +3872,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -3939,25 +3939,25 @@ define void @v_shuffle_v4i64_v4i64__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4008,26 +4008,26 @@ define void @v_shuffle_v4i64_v4i64__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4076,26 +4076,26 @@ define void @v_shuffle_v4i64_v4i64__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4142,25 +4142,25 @@ define void @v_shuffle_v4i64_v4i64__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4207,25 +4207,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4268,22 +4268,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4326,22 +4326,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4388,25 +4388,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4453,24 +4453,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4517,25 +4517,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4584,26 +4584,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4650,25 +4650,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -4705,19 +4705,19 @@ define void @v_shuffle_v4i64_v4i64__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4757,21 +4757,21 @@ define void @v_shuffle_v4i64_v4i64__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4807,19 +4807,19 @@ define void @v_shuffle_v4i64_v4i64__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4855,19 +4855,19 @@ define void @v_shuffle_v4i64_v4i64__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4909,22 +4909,22 @@ define void @v_shuffle_v4i64_v4i64__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4960,19 +4960,19 @@ define void @v_shuffle_v4i64_v4i64__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -5018,25 +5018,25 @@ define void @v_shuffle_v4i64_v4i64__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5083,25 +5083,25 @@ define void @v_shuffle_v4i64_v4i64__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5148,25 +5148,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5213,25 +5213,25 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5282,27 +5282,27 @@ define void @v_shuffle_v4i64_v4i64__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5349,25 +5349,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5416,25 +5416,25 @@ define void @v_shuffle_v4i64_v4i64__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5485,26 +5485,26 @@ define void @v_shuffle_v4i64_v4i64__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5553,26 +5553,26 @@ define void @v_shuffle_v4i64_v4i64__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5619,25 +5619,25 @@ define void @v_shuffle_v4i64_v4i64__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5684,25 +5684,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5745,22 +5745,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5807,24 +5807,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5867,22 +5867,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5929,24 +5929,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -5993,25 +5993,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6060,26 +6060,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6126,25 +6126,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6181,19 +6181,19 @@ define void @v_shuffle_v4i64_v4i64__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6233,21 +6233,21 @@ define void @v_shuffle_v4i64_v4i64__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6283,19 +6283,19 @@ define void @v_shuffle_v4i64_v4i64__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6335,21 +6335,21 @@ define void @v_shuffle_v4i64_v4i64__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6385,19 +6385,19 @@ define void @v_shuffle_v4i64_v4i64__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6433,19 +6433,19 @@ define void @v_shuffle_v4i64_v4i64__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6491,24 +6491,24 @@ define void @v_shuffle_v4i64_v4i64__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6555,24 +6555,24 @@ define void @v_shuffle_v4i64_v4i64__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6621,25 +6621,25 @@ define void @v_shuffle_v4i64_v4i64__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6686,24 +6686,24 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6754,26 +6754,26 @@ define void @v_shuffle_v4i64_v4i64__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6820,24 +6820,24 @@ define void @v_shuffle_v4i64_v4i64__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6890,27 +6890,27 @@ define void @v_shuffle_v4i64_v4i64__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -6961,26 +6961,26 @@ define void @v_shuffle_v4i64_v4i64__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7027,24 +7027,24 @@ define void @v_shuffle_v4i64_v4i64__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7091,24 +7091,24 @@ define void @v_shuffle_v4i64_v4i64__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7155,24 +7155,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7215,22 +7215,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7277,24 +7277,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7341,24 +7341,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7401,22 +7401,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7463,24 +7463,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7529,26 +7529,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7595,24 +7595,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v12 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v12 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7656,17 +7656,17 @@ define void @v_shuffle_v4i64_v4i64__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7696,16 +7696,16 @@ define void @v_shuffle_v4i64_v4i64__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7735,16 +7735,16 @@ define void @v_shuffle_v4i64_v4i64__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7778,18 +7778,18 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7840,21 +7840,21 @@ define void @v_shuffle_v4i64_v4i64__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7895,21 +7895,21 @@ define void @v_shuffle_v4i64_v4i64__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -7954,23 +7954,23 @@ define void @v_shuffle_v4i64_v4i64__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8013,22 +8013,22 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8080,28 +8080,28 @@ define void @v_shuffle_v4i64_v4i64__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8148,25 +8148,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8213,25 +8213,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8278,24 +8278,24 @@ define void @v_shuffle_v4i64_v4i64__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8340,23 +8340,23 @@ define void @v_shuffle_v4i64_v4i64__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8399,22 +8399,22 @@ define void @v_shuffle_v4i64_v4i64__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8455,21 +8455,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8510,21 +8510,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8567,22 +8567,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8625,22 +8625,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8683,22 +8683,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8749,26 +8749,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8811,22 +8811,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8873,24 +8873,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8927,19 +8927,19 @@ define void @v_shuffle_v4i64_v4i64__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -8986,25 +8986,25 @@ define void @v_shuffle_v4i64_v4i64__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9051,25 +9051,25 @@ define void @v_shuffle_v4i64_v4i64__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9116,25 +9116,25 @@ define void @v_shuffle_v4i64_v4i64__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9181,24 +9181,24 @@ define void @v_shuffle_v4i64_v4i64__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9235,19 +9235,19 @@ define void @v_shuffle_v4i64_v4i64__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9284,19 +9284,19 @@ define void @v_shuffle_v4i64_v4i64__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9337,21 +9337,21 @@ define void @v_shuffle_v4i64_v4i64__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9392,21 +9392,21 @@ define void @v_shuffle_v4i64_v4i64__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9447,21 +9447,21 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9512,27 +9512,27 @@ define void @v_shuffle_v4i64_v4i64__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9579,25 +9579,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9644,25 +9644,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9709,24 +9709,24 @@ define void @v_shuffle_v4i64_v4i64__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9773,24 +9773,24 @@ define void @v_shuffle_v4i64_v4i64__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9833,22 +9833,22 @@ define void @v_shuffle_v4i64_v4i64__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9891,22 +9891,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -9943,19 +9943,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10002,25 +10002,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10067,25 +10067,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10132,25 +10132,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10197,24 +10197,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10251,19 +10251,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10304,21 +10304,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10355,19 +10355,19 @@ define void @v_shuffle_v4i64_v4i64__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10414,25 +10414,25 @@ define void @v_shuffle_v4i64_v4i64__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10479,25 +10479,25 @@ define void @v_shuffle_v4i64_v4i64__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10544,25 +10544,25 @@ define void @v_shuffle_v4i64_v4i64__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10609,24 +10609,24 @@ define void @v_shuffle_v4i64_v4i64__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, v12 -; GFX940-NEXT: v_mov_b32_e32 v15, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, v12 +; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10667,21 +10667,21 @@ define void @v_shuffle_v4i64_v4i64__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10718,19 +10718,19 @@ define void @v_shuffle_v4i64_v4i64__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10767,19 +10767,19 @@ define void @v_shuffle_v4i64_v4i64__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10822,22 +10822,22 @@ define void @v_shuffle_v4i64_v4i64__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10878,21 +10878,21 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -10943,27 +10943,27 @@ define void @v_shuffle_v4i64_v4i64__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11010,25 +11010,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11075,25 +11075,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11140,24 +11140,24 @@ define void @v_shuffle_v4i64_v4i64__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11204,24 +11204,24 @@ define void @v_shuffle_v4i64_v4i64__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11266,23 +11266,23 @@ define void @v_shuffle_v4i64_v4i64__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11325,22 +11325,22 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11379,20 +11379,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11439,25 +11439,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11504,25 +11504,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11569,25 +11569,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11636,26 +11636,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11696,21 +11696,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11749,20 +11749,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11799,19 +11799,19 @@ define void @v_shuffle_v4i64_v4i64__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11858,25 +11858,25 @@ define void @v_shuffle_v4i64_v4i64__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11923,25 +11923,25 @@ define void @v_shuffle_v4i64_v4i64__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -11988,25 +11988,25 @@ define void @v_shuffle_v4i64_v4i64__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12055,25 +12055,25 @@ define void @v_shuffle_v4i64_v4i64__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12114,21 +12114,21 @@ define void @v_shuffle_v4i64_v4i64__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12165,19 +12165,19 @@ define void @v_shuffle_v4i64_v4i64__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12218,21 +12218,21 @@ define void @v_shuffle_v4i64_v4i64__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12273,21 +12273,21 @@ define void @v_shuffle_v4i64_v4i64__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12338,27 +12338,27 @@ define void @v_shuffle_v4i64_v4i64__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12405,25 +12405,25 @@ define void @v_shuffle_v4i64_v4i64__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12470,25 +12470,25 @@ define void @v_shuffle_v4i64_v4i64__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12535,24 +12535,24 @@ define void @v_shuffle_v4i64_v4i64__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12597,23 +12597,23 @@ define void @v_shuffle_v4i64_v4i64__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12654,21 +12654,21 @@ define void @v_shuffle_v4i64_v4i64__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12709,21 +12709,21 @@ define void @v_shuffle_v4i64_v4i64__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12762,20 +12762,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12822,25 +12822,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12887,25 +12887,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -12952,25 +12952,25 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13019,26 +13019,26 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13079,21 +13079,21 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13130,19 +13130,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13181,20 +13181,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4i64_v4i64__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=v"() %vec1 = call <4 x i64> asm "; def $0", "=v"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13239,17 +13239,17 @@ define void @s_shuffle_v4i64_v4i64__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -13283,18 +13283,18 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -13324,18 +13324,18 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -13369,18 +13369,18 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -13428,18 +13428,18 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13470,18 +13470,18 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13516,18 +13516,18 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13572,24 +13572,24 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13630,21 +13630,21 @@ define void @s_shuffle_v4i64_v4i64__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13689,23 +13689,23 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13746,23 +13746,23 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13801,20 +13801,20 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13873,20 +13873,20 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13921,20 +13921,20 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13983,23 +13983,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14048,23 +14048,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14105,23 +14105,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14166,25 +14166,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14223,22 +14223,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14277,22 +14277,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14357,22 +14357,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14425,27 +14425,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14498,27 +14498,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14571,27 +14571,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14640,25 +14640,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14705,24 +14705,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14765,24 +14765,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14829,24 +14829,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14913,22 +14913,22 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -14997,24 +14997,24 @@ define void @s_shuffle_v4i64_v4i64__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -15056,24 +15056,24 @@ define void @s_shuffle_v4i64_v4i64__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -15119,24 +15119,24 @@ define void @s_shuffle_v4i64_v4i64__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -15178,22 +15178,22 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -15245,27 +15245,27 @@ define void @s_shuffle_v4i64_v4i64__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15318,25 +15318,25 @@ define void @s_shuffle_v4i64_v4i64__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15389,27 +15389,27 @@ define void @s_shuffle_v4i64_v4i64__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15458,25 +15458,25 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15529,27 +15529,27 @@ define void @s_shuffle_v4i64_v4i64__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15602,27 +15602,27 @@ define void @s_shuffle_v4i64_v4i64__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15671,27 +15671,27 @@ define void @s_shuffle_v4i64_v4i64__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15744,27 +15744,27 @@ define void @s_shuffle_v4i64_v4i64__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15813,25 +15813,25 @@ define void @s_shuffle_v4i64_v4i64__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15884,27 +15884,27 @@ define void @s_shuffle_v4i64_v4i64__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -15957,25 +15957,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16024,23 +16024,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16093,25 +16093,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16160,27 +16160,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16229,27 +16229,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16298,25 +16298,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16369,25 +16369,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16436,25 +16436,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16635,25 +16635,25 @@ define void @s_shuffle_v4i64_v4i64__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16702,25 +16702,25 @@ define void @s_shuffle_v4i64_v4i64__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16769,25 +16769,25 @@ define void @s_shuffle_v4i64_v4i64__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16836,25 +16836,25 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16907,27 +16907,27 @@ define void @s_shuffle_v4i64_v4i64__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -16976,27 +16976,27 @@ define void @s_shuffle_v4i64_v4i64__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17045,27 +17045,27 @@ define void @s_shuffle_v4i64_v4i64__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17118,27 +17118,27 @@ define void @s_shuffle_v4i64_v4i64__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17187,25 +17187,25 @@ define void @s_shuffle_v4i64_v4i64__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17258,27 +17258,27 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17331,25 +17331,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17398,23 +17398,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17467,25 +17467,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17530,27 +17530,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17595,27 +17595,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17664,25 +17664,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17735,25 +17735,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -17802,25 +17802,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18001,25 +18001,25 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18064,25 +18064,25 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18131,25 +18131,25 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18194,23 +18194,23 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18263,27 +18263,27 @@ define void @s_shuffle_v4i64_v4i64__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18328,23 +18328,23 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18393,25 +18393,25 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18460,25 +18460,25 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18527,25 +18527,25 @@ define void @s_shuffle_v4i64_v4i64__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18594,25 +18594,25 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18657,25 +18657,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18720,25 +18720,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18783,27 +18783,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18852,27 +18852,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18921,27 +18921,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -18990,25 +18990,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19057,27 +19057,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19126,25 +19126,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19325,25 +19325,25 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19388,25 +19388,25 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19455,25 +19455,25 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19518,23 +19518,23 @@ define void @s_shuffle_v4i64_v4i64__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19587,27 +19587,27 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19652,23 +19652,23 @@ define void @s_shuffle_v4i64_v4i64__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19717,25 +19717,25 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19784,25 +19784,25 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19851,25 +19851,25 @@ define void @s_shuffle_v4i64_v4i64__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19918,25 +19918,25 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -19981,25 +19981,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20040,23 +20040,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20101,27 +20101,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20170,27 +20170,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20231,23 +20231,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20292,25 +20292,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20355,25 +20355,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20422,25 +20422,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20485,17 +20485,17 @@ define void @s_shuffle_v4i64_v4i64__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -20529,18 +20529,18 @@ define void @s_shuffle_v4i64_v4i64__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -20570,18 +20570,18 @@ define void @s_shuffle_v4i64_v4i64__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -20615,18 +20615,18 @@ define void @s_shuffle_v4i64_v4i64__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -20686,24 +20686,24 @@ define void @s_shuffle_v4i64_v4i64__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20746,24 +20746,24 @@ define void @s_shuffle_v4i64_v4i64__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20810,24 +20810,24 @@ define void @s_shuffle_v4i64_v4i64__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20870,22 +20870,22 @@ define void @s_shuffle_v4i64_v4i64__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -20938,28 +20938,28 @@ define void @s_shuffle_v4i64_v4i64__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21008,25 +21008,25 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21079,27 +21079,27 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21148,27 +21148,27 @@ define void @s_shuffle_v4i64_v4i64__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21215,24 +21215,24 @@ define void @s_shuffle_v4i64_v4i64__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21279,24 +21279,24 @@ define void @s_shuffle_v4i64_v4i64__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21339,24 +21339,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21395,22 +21395,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21463,25 +21463,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21534,25 +21534,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21597,25 +21597,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21664,27 +21664,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21727,24 +21727,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21791,24 +21791,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21875,25 +21875,25 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -21942,25 +21942,25 @@ define void @s_shuffle_v4i64_v4i64__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22009,25 +22009,25 @@ define void @s_shuffle_v4i64_v4i64__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22076,25 +22076,25 @@ define void @s_shuffle_v4i64_v4i64__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22231,22 +22231,22 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22295,28 +22295,28 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22361,25 +22361,25 @@ define void @s_shuffle_v4i64_v4i64__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22432,25 +22432,25 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22495,25 +22495,25 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22560,24 +22560,24 @@ define void @s_shuffle_v4i64_v4i64__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22620,24 +22620,24 @@ define void @s_shuffle_v4i64_v4i64__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22680,24 +22680,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22736,22 +22736,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22800,25 +22800,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22867,25 +22867,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22930,25 +22930,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -22997,25 +22997,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23058,24 +23058,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23118,24 +23118,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23206,25 +23206,25 @@ define void @s_shuffle_v4i64_v4i64__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23273,25 +23273,25 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23340,25 +23340,25 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23407,25 +23407,25 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23590,25 +23590,25 @@ define void @s_shuffle_v4i64_v4i64__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23657,25 +23657,25 @@ define void @s_shuffle_v4i64_v4i64__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23724,25 +23724,25 @@ define void @s_shuffle_v4i64_v4i64__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23791,25 +23791,25 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23856,24 +23856,24 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23962,22 +23962,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24030,27 +24030,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24103,27 +24103,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24172,25 +24172,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24243,27 +24243,27 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24306,24 +24306,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24370,24 +24370,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24458,25 +24458,25 @@ define void @s_shuffle_v4i64_v4i64__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24525,25 +24525,25 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24592,25 +24592,25 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24659,25 +24659,25 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24818,25 +24818,25 @@ define void @s_shuffle_v4i64_v4i64__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24885,25 +24885,25 @@ define void @s_shuffle_v4i64_v4i64__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24952,25 +24952,25 @@ define void @s_shuffle_v4i64_v4i64__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25019,25 +25019,25 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25084,24 +25084,24 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25218,25 +25218,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25285,25 +25285,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25352,25 +25352,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25419,25 +25419,25 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25480,24 +25480,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25544,24 +25544,24 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll index ac9124d352799..b30af835a7882 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p0_v2p0__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4p0_v2p0__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -81,18 +81,18 @@ define void @v_shuffle_v4p0_v2p0__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -137,18 +137,18 @@ define void @v_shuffle_v4p0_v2p0__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -193,24 +193,24 @@ define void @v_shuffle_v4p0_v2p0__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -251,22 +251,22 @@ define void @v_shuffle_v4p0_v2p0__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -301,19 +301,19 @@ define void @v_shuffle_v4p0_v2p0__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -348,18 +348,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -404,23 +404,23 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -467,24 +467,24 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -523,20 +523,20 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -573,19 +573,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -636,26 +636,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -702,24 +702,24 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -764,23 +764,23 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -817,19 +817,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -866,19 +866,19 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -914,19 +914,19 @@ define void @v_shuffle_v4p0_v2p0__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> zeroinitializer store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -966,22 +966,22 @@ define void @v_shuffle_v4p0_v2p0__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1017,19 +1017,19 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1081,28 +1081,28 @@ define void @v_shuffle_v4p0_v2p0__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1151,26 +1151,26 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1221,26 +1221,26 @@ define void @v_shuffle_v4p0_v2p0__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1293,27 +1293,27 @@ define void @v_shuffle_v4p0_v2p0__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1362,26 +1362,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1430,26 +1430,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1498,26 +1498,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1568,26 +1568,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1624,19 +1624,19 @@ define void @v_shuffle_v4p0_v2p0__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1676,22 +1676,22 @@ define void @v_shuffle_v4p0_v2p0__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1727,19 +1727,19 @@ define void @v_shuffle_v4p0_v2p0__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1775,19 +1775,19 @@ define void @v_shuffle_v4p0_v2p0__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1835,25 +1835,25 @@ define void @v_shuffle_v4p0_v2p0__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1902,25 +1902,25 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -1975,28 +1975,28 @@ define void @v_shuffle_v4p0_v2p0__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2049,27 +2049,27 @@ define void @v_shuffle_v4p0_v2p0__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2116,24 +2116,24 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2176,22 +2176,22 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2234,22 +2234,22 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2296,24 +2296,24 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2355,16 +2355,16 @@ define void @v_shuffle_v4p0_v2p0__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2398,18 +2398,18 @@ define void @v_shuffle_v4p0_v2p0__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2460,22 +2460,22 @@ define void @v_shuffle_v4p0_v2p0__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2520,23 +2520,23 @@ define void @v_shuffle_v4p0_v2p0__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2591,29 +2591,29 @@ define void @v_shuffle_v4p0_v2p0__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2664,27 +2664,27 @@ define void @v_shuffle_v4p0_v2p0__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2729,23 +2729,23 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2786,21 +2786,21 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2845,23 +2845,23 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2912,26 +2912,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -2968,19 +2968,19 @@ define void @v_shuffle_v4p0_v2p0__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3023,23 +3023,23 @@ define void @v_shuffle_v4p0_v2p0__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3088,25 +3088,25 @@ define void @v_shuffle_v4p0_v2p0__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3147,22 +3147,22 @@ define void @v_shuffle_v4p0_v2p0__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3199,19 +3199,19 @@ define void @v_shuffle_v4p0_v2p0__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3260,26 +3260,26 @@ define void @v_shuffle_v4p0_v2p0__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3326,24 +3326,24 @@ define void @v_shuffle_v4p0_v2p0__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3390,25 +3390,25 @@ define void @v_shuffle_v4p0_v2p0__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3447,20 +3447,20 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3503,23 +3503,23 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3568,26 +3568,26 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3626,20 +3626,20 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v2p0__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=v"() %vec1 = call <2 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3684,17 +3684,17 @@ define void @s_shuffle_v4p0_v2p0__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -3728,18 +3728,18 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -3787,18 +3787,18 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3843,23 +3843,23 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3900,21 +3900,21 @@ define void @s_shuffle_v4p0_v2p0__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3953,20 +3953,20 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4054,23 +4054,23 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4163,25 +4163,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4253,24 +4253,24 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4431,25 +4431,25 @@ define void @s_shuffle_v4p0_v2p0__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4494,23 +4494,23 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4559,25 +4559,25 @@ define void @s_shuffle_v4p0_v2p0__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4626,25 +4626,25 @@ define void @s_shuffle_v4p0_v2p0__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4714,23 +4714,23 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4779,25 +4779,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4846,25 +4846,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4999,25 +4999,25 @@ define void @s_shuffle_v4p0_v2p0__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5062,23 +5062,23 @@ define void @s_shuffle_v4p0_v2p0__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5127,25 +5127,25 @@ define void @s_shuffle_v4p0_v2p0__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5194,25 +5194,25 @@ define void @s_shuffle_v4p0_v2p0__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5332,25 +5332,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5395,17 +5395,17 @@ define void @s_shuffle_v4p0_v2p0__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -5439,18 +5439,18 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -5558,25 +5558,25 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5668,22 +5668,22 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5732,25 +5732,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5803,27 +5803,27 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5919,25 +5919,25 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6030,25 +6030,25 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6193,25 +6193,25 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v2p0__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll index 7a509ffb8c159..e6ac554735eee 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p0_v3p0__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v4p0_v3p0__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v4p0_v3p0__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -123,18 +123,18 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -175,16 +175,16 @@ define void @v_shuffle_v4p0_v3p0__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -219,18 +219,18 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -275,24 +275,24 @@ define void @v_shuffle_v4p0_v3p0__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -333,22 +333,22 @@ define void @v_shuffle_v4p0_v3p0__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -389,22 +389,22 @@ define void @v_shuffle_v4p0_v3p0__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -443,20 +443,20 @@ define void @v_shuffle_v4p0_v3p0__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -491,18 +491,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -537,18 +537,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -591,22 +591,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -649,22 +649,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -711,24 +711,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -767,20 +767,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -819,20 +819,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -875,22 +875,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -943,28 +943,28 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1011,25 +1011,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1076,24 +1076,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1138,23 +1138,23 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1197,22 +1197,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1249,19 +1249,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1298,19 +1298,19 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1346,19 +1346,19 @@ define void @v_shuffle_v4p0_v3p0__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> zeroinitializer store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1398,21 +1398,21 @@ define void @v_shuffle_v4p0_v3p0__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1458,24 +1458,24 @@ define void @v_shuffle_v4p0_v3p0__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1511,19 +1511,19 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1569,25 +1569,25 @@ define void @v_shuffle_v4p0_v3p0__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1638,27 +1638,27 @@ define void @v_shuffle_v4p0_v3p0__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1707,26 +1707,26 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1777,27 +1777,27 @@ define void @v_shuffle_v4p0_v3p0__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1846,25 +1846,25 @@ define void @v_shuffle_v4p0_v3p0__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1917,27 +1917,27 @@ define void @v_shuffle_v4p0_v3p0__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -1986,26 +1986,26 @@ define void @v_shuffle_v4p0_v3p0__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2052,25 +2052,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2117,25 +2117,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2182,25 +2182,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2251,26 +2251,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2319,26 +2319,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2391,28 +2391,28 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2449,19 +2449,19 @@ define void @v_shuffle_v4p0_v3p0__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2497,19 +2497,19 @@ define void @v_shuffle_v4p0_v3p0__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2545,19 +2545,19 @@ define void @v_shuffle_v4p0_v3p0__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2599,22 +2599,22 @@ define void @v_shuffle_v4p0_v3p0__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2650,19 +2650,19 @@ define void @v_shuffle_v4p0_v3p0__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2708,25 +2708,25 @@ define void @v_shuffle_v4p0_v3p0__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2773,25 +2773,25 @@ define void @v_shuffle_v4p0_v3p0__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2838,25 +2838,25 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2909,28 +2909,28 @@ define void @v_shuffle_v4p0_v3p0__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -2979,25 +2979,25 @@ define void @v_shuffle_v4p0_v3p0__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3048,26 +3048,26 @@ define void @v_shuffle_v4p0_v3p0__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3116,26 +3116,26 @@ define void @v_shuffle_v4p0_v3p0__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3182,25 +3182,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3243,22 +3243,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3301,22 +3301,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3363,24 +3363,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3429,26 +3429,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3495,25 +3495,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3550,19 +3550,19 @@ define void @v_shuffle_v4p0_v3p0__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3598,19 +3598,19 @@ define void @v_shuffle_v4p0_v3p0__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3650,22 +3650,22 @@ define void @v_shuffle_v4p0_v3p0__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3701,19 +3701,19 @@ define void @v_shuffle_v4p0_v3p0__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3749,19 +3749,19 @@ define void @v_shuffle_v4p0_v3p0__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3807,24 +3807,24 @@ define void @v_shuffle_v4p0_v3p0__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3873,25 +3873,25 @@ define void @v_shuffle_v4p0_v3p0__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -3938,24 +3938,24 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4008,27 +4008,27 @@ define void @v_shuffle_v4p0_v3p0__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4079,26 +4079,26 @@ define void @v_shuffle_v4p0_v3p0__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4149,26 +4149,26 @@ define void @v_shuffle_v4p0_v3p0__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4215,24 +4215,24 @@ define void @v_shuffle_v4p0_v3p0__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4279,24 +4279,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4339,22 +4339,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4401,24 +4401,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4461,22 +4461,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4525,26 +4525,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4591,24 +4591,24 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4652,17 +4652,17 @@ define void @v_shuffle_v4p0_v3p0__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4692,16 +4692,16 @@ define void @v_shuffle_v4p0_v3p0__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4735,18 +4735,18 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4797,21 +4797,21 @@ define void @v_shuffle_v4p0_v3p0__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4858,24 +4858,24 @@ define void @v_shuffle_v4p0_v3p0__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4918,22 +4918,22 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -4986,28 +4986,28 @@ define void @v_shuffle_v4p0_v3p0__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5054,25 +5054,25 @@ define void @v_shuffle_v4p0_v3p0__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5119,24 +5119,24 @@ define void @v_shuffle_v4p0_v3p0__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5181,23 +5181,23 @@ define void @v_shuffle_v4p0_v3p0__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5240,22 +5240,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5298,22 +5298,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5356,22 +5356,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5414,22 +5414,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5480,26 +5480,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5544,23 +5544,23 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5597,19 +5597,19 @@ define void @v_shuffle_v4p0_v3p0__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5656,25 +5656,25 @@ define void @v_shuffle_v4p0_v3p0__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5721,25 +5721,25 @@ define void @v_shuffle_v4p0_v3p0__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5786,24 +5786,24 @@ define void @v_shuffle_v4p0_v3p0__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5840,19 +5840,19 @@ define void @v_shuffle_v4p0_v3p0__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5889,19 +5889,19 @@ define void @v_shuffle_v4p0_v3p0__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -5944,22 +5944,22 @@ define void @v_shuffle_v4p0_v3p0__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6002,22 +6002,22 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6070,28 +6070,28 @@ define void @v_shuffle_v4p0_v3p0__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6138,25 +6138,25 @@ define void @v_shuffle_v4p0_v3p0__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6203,24 +6203,24 @@ define void @v_shuffle_v4p0_v3p0__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6269,25 +6269,25 @@ define void @v_shuffle_v4p0_v3p0__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6330,22 +6330,22 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6384,20 +6384,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6444,25 +6444,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6509,25 +6509,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6576,26 +6576,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6634,20 +6634,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6684,19 +6684,19 @@ define void @v_shuffle_v4p0_v3p0__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6743,25 +6743,25 @@ define void @v_shuffle_v4p0_v3p0__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6808,25 +6808,25 @@ define void @v_shuffle_v4p0_v3p0__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6875,25 +6875,25 @@ define void @v_shuffle_v4p0_v3p0__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6930,19 +6930,19 @@ define void @v_shuffle_v4p0_v3p0__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -6983,22 +6983,22 @@ define void @v_shuffle_v4p0_v3p0__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7039,21 +7039,21 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7104,27 +7104,27 @@ define void @v_shuffle_v4p0_v3p0__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7171,25 +7171,25 @@ define void @v_shuffle_v4p0_v3p0__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7236,24 +7236,24 @@ define void @v_shuffle_v4p0_v3p0__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7296,22 +7296,22 @@ define void @v_shuffle_v4p0_v3p0__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7356,23 +7356,23 @@ define void @v_shuffle_v4p0_v3p0__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7411,20 +7411,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7471,25 +7471,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7536,25 +7536,25 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7603,26 +7603,26 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7659,19 +7659,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7710,20 +7710,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v3p0__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=v"() %vec1 = call <3 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7768,17 +7768,17 @@ define void @s_shuffle_v4p0_v3p0__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -7812,18 +7812,18 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -7853,18 +7853,18 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -7912,18 +7912,18 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -7954,18 +7954,18 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8010,21 +8010,21 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8061,21 +8061,21 @@ define void @s_shuffle_v4p0_v3p0__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8116,23 +8116,23 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8167,20 +8167,20 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8239,20 +8239,20 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8301,25 +8301,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8368,25 +8368,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8431,23 +8431,23 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8486,22 +8486,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8544,22 +8544,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8630,25 +8630,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8697,25 +8697,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8764,25 +8764,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8829,24 +8829,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8889,24 +8889,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8973,22 +8973,22 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -9057,24 +9057,24 @@ define void @s_shuffle_v4p0_v3p0__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -9116,24 +9116,24 @@ define void @s_shuffle_v4p0_v3p0__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -9175,22 +9175,22 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -9242,27 +9242,27 @@ define void @s_shuffle_v4p0_v3p0__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9315,25 +9315,25 @@ define void @s_shuffle_v4p0_v3p0__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9382,23 +9382,23 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9451,25 +9451,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9518,27 +9518,27 @@ define void @s_shuffle_v4p0_v3p0__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9591,25 +9591,25 @@ define void @s_shuffle_v4p0_v3p0__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9658,25 +9658,25 @@ define void @s_shuffle_v4p0_v3p0__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9729,27 +9729,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9798,25 +9798,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9869,27 +9869,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -9942,27 +9942,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10011,25 +10011,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10082,27 +10082,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10260,25 +10260,25 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10323,25 +10323,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10390,23 +10390,23 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10459,25 +10459,25 @@ define void @s_shuffle_v4p0_v3p0__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10522,27 +10522,27 @@ define void @s_shuffle_v4p0_v3p0__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10595,25 +10595,25 @@ define void @s_shuffle_v4p0_v3p0__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10662,25 +10662,25 @@ define void @s_shuffle_v4p0_v3p0__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10733,27 +10733,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10802,25 +10802,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10873,27 +10873,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -10942,27 +10942,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11011,25 +11011,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11082,27 +11082,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11260,25 +11260,25 @@ define void @s_shuffle_v4p0_v3p0__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11323,25 +11323,25 @@ define void @s_shuffle_v4p0_v3p0__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11382,23 +11382,23 @@ define void @s_shuffle_v4p0_v3p0__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11447,27 +11447,27 @@ define void @s_shuffle_v4p0_v3p0__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11508,23 +11508,23 @@ define void @s_shuffle_v4p0_v3p0__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11569,25 +11569,25 @@ define void @s_shuffle_v4p0_v3p0__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11636,25 +11636,25 @@ define void @s_shuffle_v4p0_v3p0__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11703,25 +11703,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11770,25 +11770,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11837,27 +11837,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11910,27 +11910,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -11983,25 +11983,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12054,27 +12054,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12119,17 +12119,17 @@ define void @s_shuffle_v4p0_v3p0__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -12163,18 +12163,18 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -12204,18 +12204,18 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -12275,24 +12275,24 @@ define void @s_shuffle_v4p0_v3p0__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12335,24 +12335,24 @@ define void @s_shuffle_v4p0_v3p0__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12391,22 +12391,22 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12459,25 +12459,25 @@ define void @s_shuffle_v4p0_v3p0__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12522,25 +12522,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12589,27 +12589,27 @@ define void @s_shuffle_v4p0_v3p0__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12652,24 +12652,24 @@ define void @s_shuffle_v4p0_v3p0__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12716,24 +12716,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12776,22 +12776,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12844,27 +12844,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12917,27 +12917,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:21] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s20 -; GFX940-NEXT: s_mov_b32 s9, s21 -; GFX940-NEXT: s_mov_b32 s10, s20 -; GFX940-NEXT: s_mov_b32 s11, s21 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:21] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s20 +; GFX942-NEXT: s_mov_b32 s9, s21 +; GFX942-NEXT: s_mov_b32 s10, s20 +; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -12986,25 +12986,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13051,24 +13051,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13135,25 +13135,25 @@ define void @s_shuffle_v4p0_v3p0__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13202,25 +13202,25 @@ define void @s_shuffle_v4p0_v3p0__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13269,25 +13269,25 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13396,22 +13396,22 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13460,25 +13460,25 @@ define void @s_shuffle_v4p0_v3p0__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13523,25 +13523,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13590,25 +13590,25 @@ define void @s_shuffle_v4p0_v3p0__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13651,24 +13651,24 @@ define void @s_shuffle_v4p0_v3p0__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13711,24 +13711,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13767,22 +13767,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13831,25 +13831,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13898,25 +13898,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13965,25 +13965,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14022,24 +14022,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14110,25 +14110,25 @@ define void @s_shuffle_v4p0_v3p0__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14177,25 +14177,25 @@ define void @s_shuffle_v4p0_v3p0__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14244,25 +14244,25 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14379,25 +14379,25 @@ define void @s_shuffle_v4p0_v3p0__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14446,25 +14446,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14513,25 +14513,25 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s12 -; GFX940-NEXT: s_mov_b32 s9, s13 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s12 +; GFX942-NEXT: s_mov_b32 s9, s13 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14578,24 +14578,24 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14660,22 +14660,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14728,27 +14728,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14801,27 +14801,27 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:17] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s16 -; GFX940-NEXT: s_mov_b32 s9, s17 -; GFX940-NEXT: s_mov_b32 s10, s16 -; GFX940-NEXT: s_mov_b32 s11, s17 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:17] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s16 +; GFX942-NEXT: s_mov_b32 s9, s17 +; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14870,25 +14870,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14931,24 +14931,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14995,24 +14995,24 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll index 8ce765abf5e82..ce1c54129f706 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p0_v4p0__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -40,17 +40,17 @@ define void @v_shuffle_v4p0_v4p0__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -80,16 +80,16 @@ define void @v_shuffle_v4p0_v4p0__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -119,16 +119,16 @@ define void @v_shuffle_v4p0_v4p0__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -162,18 +162,18 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -214,16 +214,16 @@ define void @v_shuffle_v4p0_v4p0__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -254,16 +254,16 @@ define void @v_shuffle_v4p0_v4p0__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -298,18 +298,18 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -354,24 +354,24 @@ define void @v_shuffle_v4p0_v4p0__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -412,22 +412,22 @@ define void @v_shuffle_v4p0_v4p0__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -468,22 +468,22 @@ define void @v_shuffle_v4p0_v4p0__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -524,22 +524,22 @@ define void @v_shuffle_v4p0_v4p0__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -578,20 +578,20 @@ define void @v_shuffle_v4p0_v4p0__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -626,18 +626,18 @@ define void @v_shuffle_v4p0_v4p0__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -672,18 +672,18 @@ define void @v_shuffle_v4p0_v4p0__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -718,18 +718,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -772,22 +772,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -830,22 +830,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -888,22 +888,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -950,24 +950,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1004,19 +1004,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1055,20 +1055,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1107,20 +1107,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1161,21 +1161,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1226,27 +1226,27 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1293,25 +1293,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1358,25 +1358,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1423,24 +1423,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1487,24 +1487,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1545,21 +1545,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1602,22 +1602,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1654,19 +1654,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -1703,19 +1703,19 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1751,19 +1751,19 @@ define void @v_shuffle_v4p0_v4p0__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1803,21 +1803,21 @@ define void @v_shuffle_v4p0_v4p0__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1857,21 +1857,21 @@ define void @v_shuffle_v4p0_v4p0__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1915,23 +1915,23 @@ define void @v_shuffle_v4p0_v4p0__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -1967,19 +1967,19 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -2025,25 +2025,25 @@ define void @v_shuffle_v4p0_v4p0__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2090,25 +2090,25 @@ define void @v_shuffle_v4p0_v4p0__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2159,27 +2159,27 @@ define void @v_shuffle_v4p0_v4p0__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2228,26 +2228,26 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2299,27 +2299,27 @@ define void @v_shuffle_v4p0_v4p0__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2368,25 +2368,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2433,24 +2433,24 @@ define void @v_shuffle_v4p0_v4p0__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2503,27 +2503,27 @@ define void @v_shuffle_v4p0_v4p0__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2572,26 +2572,26 @@ define void @v_shuffle_v4p0_v4p0__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2638,25 +2638,25 @@ define void @v_shuffle_v4p0_v4p0__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2703,25 +2703,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2768,25 +2768,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2833,25 +2833,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2898,25 +2898,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -2967,26 +2967,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3033,25 +3033,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3100,26 +3100,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3170,27 +3170,27 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3227,19 +3227,19 @@ define void @v_shuffle_v4p0_v4p0__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3275,19 +3275,19 @@ define void @v_shuffle_v4p0_v4p0__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3323,19 +3323,19 @@ define void @v_shuffle_v4p0_v4p0__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3375,21 +3375,21 @@ define void @v_shuffle_v4p0_v4p0__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3429,21 +3429,21 @@ define void @v_shuffle_v4p0_v4p0__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3479,19 +3479,19 @@ define void @v_shuffle_v4p0_v4p0__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -3537,25 +3537,25 @@ define void @v_shuffle_v4p0_v4p0__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3602,25 +3602,25 @@ define void @v_shuffle_v4p0_v4p0__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v2 -; GFX940-NEXT: v_mov_b32_e32 v11, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v2 +; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3667,25 +3667,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3732,25 +3732,25 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3802,28 +3802,28 @@ define void @v_shuffle_v4p0_v4p0__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3872,25 +3872,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -3939,25 +3939,25 @@ define void @v_shuffle_v4p0_v4p0__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4008,26 +4008,26 @@ define void @v_shuffle_v4p0_v4p0__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4076,26 +4076,26 @@ define void @v_shuffle_v4p0_v4p0__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4142,25 +4142,25 @@ define void @v_shuffle_v4p0_v4p0__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4207,25 +4207,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4268,22 +4268,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4326,22 +4326,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4388,25 +4388,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4453,24 +4453,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4517,25 +4517,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4584,26 +4584,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v2 -; GFX940-NEXT: v_mov_b32_e32 v9, v3 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v2 +; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4650,25 +4650,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -4705,19 +4705,19 @@ define void @v_shuffle_v4p0_v4p0__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4757,21 +4757,21 @@ define void @v_shuffle_v4p0_v4p0__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4807,19 +4807,19 @@ define void @v_shuffle_v4p0_v4p0__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4855,19 +4855,19 @@ define void @v_shuffle_v4p0_v4p0__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4909,22 +4909,22 @@ define void @v_shuffle_v4p0_v4p0__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -4960,19 +4960,19 @@ define void @v_shuffle_v4p0_v4p0__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -5018,25 +5018,25 @@ define void @v_shuffle_v4p0_v4p0__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5083,25 +5083,25 @@ define void @v_shuffle_v4p0_v4p0__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v12, v4 -; GFX940-NEXT: v_mov_b32_e32 v13, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v12, v4 +; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5148,25 +5148,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5213,25 +5213,25 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5282,27 +5282,27 @@ define void @v_shuffle_v4p0_v4p0__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5349,25 +5349,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v12 -; GFX940-NEXT: v_mov_b32_e32 v1, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v12 +; GFX942-NEXT: v_mov_b32_e32 v1, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5416,25 +5416,25 @@ define void @v_shuffle_v4p0_v4p0__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5485,26 +5485,26 @@ define void @v_shuffle_v4p0_v4p0__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5553,26 +5553,26 @@ define void @v_shuffle_v4p0_v4p0__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5619,25 +5619,25 @@ define void @v_shuffle_v4p0_v4p0__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v8, v12 -; GFX940-NEXT: v_mov_b32_e32 v9, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v8, v12 +; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5684,25 +5684,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5745,22 +5745,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5807,24 +5807,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5867,22 +5867,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5929,24 +5929,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -5993,25 +5993,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6060,26 +6060,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6126,25 +6126,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v10 -; GFX940-NEXT: v_mov_b32_e32 v3, v11 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v10 +; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6181,19 +6181,19 @@ define void @v_shuffle_v4p0_v4p0__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6233,21 +6233,21 @@ define void @v_shuffle_v4p0_v4p0__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6283,19 +6283,19 @@ define void @v_shuffle_v4p0_v4p0__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6335,21 +6335,21 @@ define void @v_shuffle_v4p0_v4p0__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6385,19 +6385,19 @@ define void @v_shuffle_v4p0_v4p0__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6433,19 +6433,19 @@ define void @v_shuffle_v4p0_v4p0__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -6491,24 +6491,24 @@ define void @v_shuffle_v4p0_v4p0__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6555,24 +6555,24 @@ define void @v_shuffle_v4p0_v4p0__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v14, v6 -; GFX940-NEXT: v_mov_b32_e32 v15, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v14, v6 +; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6621,25 +6621,25 @@ define void @v_shuffle_v4p0_v4p0__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6686,24 +6686,24 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6754,26 +6754,26 @@ define void @v_shuffle_v4p0_v4p0__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6820,24 +6820,24 @@ define void @v_shuffle_v4p0_v4p0__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6890,27 +6890,27 @@ define void @v_shuffle_v4p0_v4p0__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v14 -; GFX940-NEXT: v_mov_b32_e32 v3, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v14 +; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -6961,26 +6961,26 @@ define void @v_shuffle_v4p0_v4p0__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v14 -; GFX940-NEXT: v_mov_b32_e32 v1, v15 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v14 +; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7027,24 +7027,24 @@ define void @v_shuffle_v4p0_v4p0__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v14 -; GFX940-NEXT: v_mov_b32_e32 v9, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v14 +; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7091,24 +7091,24 @@ define void @v_shuffle_v4p0_v4p0__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v10, v14 -; GFX940-NEXT: v_mov_b32_e32 v11, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v10, v14 +; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7155,24 +7155,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7215,22 +7215,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7277,24 +7277,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7341,24 +7341,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7401,22 +7401,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7463,24 +7463,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7529,26 +7529,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7595,24 +7595,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v12 -; GFX940-NEXT: v_mov_b32_e32 v5, v13 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v12 +; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7656,17 +7656,17 @@ define void @v_shuffle_v4p0_v4p0__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7696,16 +7696,16 @@ define void @v_shuffle_v4p0_v4p0__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7735,16 +7735,16 @@ define void @v_shuffle_v4p0_v4p0__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7778,18 +7778,18 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32 @@ -7840,21 +7840,21 @@ define void @v_shuffle_v4p0_v4p0__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7895,21 +7895,21 @@ define void @v_shuffle_v4p0_v4p0__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -7954,23 +7954,23 @@ define void @v_shuffle_v4p0_v4p0__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8013,22 +8013,22 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8080,28 +8080,28 @@ define void @v_shuffle_v4p0_v4p0__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8148,25 +8148,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8213,25 +8213,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8278,24 +8278,24 @@ define void @v_shuffle_v4p0_v4p0__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8340,23 +8340,23 @@ define void @v_shuffle_v4p0_v4p0__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v9, v1 -; GFX940-NEXT: v_mov_b32_e32 v10, v0 -; GFX940-NEXT: v_mov_b32_e32 v11, v1 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b32_e32 v10, v0 +; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8399,22 +8399,22 @@ define void @v_shuffle_v4p0_v4p0__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8455,21 +8455,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8510,21 +8510,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8567,22 +8567,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8625,22 +8625,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8683,22 +8683,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8749,26 +8749,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8811,22 +8811,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8873,24 +8873,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8927,19 +8927,19 @@ define void @v_shuffle_v4p0_v4p0__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -8986,25 +8986,25 @@ define void @v_shuffle_v4p0_v4p0__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9051,25 +9051,25 @@ define void @v_shuffle_v4p0_v4p0__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9116,25 +9116,25 @@ define void @v_shuffle_v4p0_v4p0__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9181,24 +9181,24 @@ define void @v_shuffle_v4p0_v4p0__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9235,19 +9235,19 @@ define void @v_shuffle_v4p0_v4p0__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9284,19 +9284,19 @@ define void @v_shuffle_v4p0_v4p0__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9337,21 +9337,21 @@ define void @v_shuffle_v4p0_v4p0__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9392,21 +9392,21 @@ define void @v_shuffle_v4p0_v4p0__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9447,21 +9447,21 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9512,27 +9512,27 @@ define void @v_shuffle_v4p0_v4p0__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9579,25 +9579,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9644,25 +9644,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9709,24 +9709,24 @@ define void @v_shuffle_v4p0_v4p0__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9773,24 +9773,24 @@ define void @v_shuffle_v4p0_v4p0__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9833,22 +9833,22 @@ define void @v_shuffle_v4p0_v4p0__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9891,22 +9891,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -9943,19 +9943,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10002,25 +10002,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10067,25 +10067,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10132,25 +10132,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10197,24 +10197,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10251,19 +10251,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10304,21 +10304,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10355,19 +10355,19 @@ define void @v_shuffle_v4p0_v4p0__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10414,25 +10414,25 @@ define void @v_shuffle_v4p0_v4p0__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v6 -; GFX940-NEXT: v_mov_b32_e32 v9, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v6 +; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10479,25 +10479,25 @@ define void @v_shuffle_v4p0_v4p0__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v8 -; GFX940-NEXT: v_mov_b32_e32 v11, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v8 +; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10544,25 +10544,25 @@ define void @v_shuffle_v4p0_v4p0__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v10 -; GFX940-NEXT: v_mov_b32_e32 v13, v11 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v10 +; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10609,24 +10609,24 @@ define void @v_shuffle_v4p0_v4p0__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, v12 -; GFX940-NEXT: v_mov_b32_e32 v15, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, v12 +; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10667,21 +10667,21 @@ define void @v_shuffle_v4p0_v4p0__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10718,19 +10718,19 @@ define void @v_shuffle_v4p0_v4p0__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10767,19 +10767,19 @@ define void @v_shuffle_v4p0_v4p0__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v4 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v4 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10822,22 +10822,22 @@ define void @v_shuffle_v4p0_v4p0__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10878,21 +10878,21 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -10943,27 +10943,27 @@ define void @v_shuffle_v4p0_v4p0__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11010,25 +11010,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11075,25 +11075,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11140,24 +11140,24 @@ define void @v_shuffle_v4p0_v4p0__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11204,24 +11204,24 @@ define void @v_shuffle_v4p0_v4p0__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11266,23 +11266,23 @@ define void @v_shuffle_v4p0_v4p0__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v5 -; GFX940-NEXT: v_mov_b32_e32 v10, v4 -; GFX940-NEXT: v_mov_b32_e32 v11, v5 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b32_e32 v10, v4 +; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11325,22 +11325,22 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11379,20 +11379,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11439,25 +11439,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11504,25 +11504,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v8 -; GFX940-NEXT: v_mov_b32_e32 v5, v9 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v8 +; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11569,25 +11569,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v10 -; GFX940-NEXT: v_mov_b32_e32 v7, v11 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v10 +; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11636,26 +11636,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v6 -; GFX940-NEXT: v_mov_b32_e32 v11, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v6 +; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11696,21 +11696,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11749,20 +11749,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11799,19 +11799,19 @@ define void @v_shuffle_v4p0_v4p0__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11858,25 +11858,25 @@ define void @v_shuffle_v4p0_v4p0__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11923,25 +11923,25 @@ define void @v_shuffle_v4p0_v4p0__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -11988,25 +11988,25 @@ define void @v_shuffle_v4p0_v4p0__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12055,25 +12055,25 @@ define void @v_shuffle_v4p0_v4p0__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12114,21 +12114,21 @@ define void @v_shuffle_v4p0_v4p0__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12165,19 +12165,19 @@ define void @v_shuffle_v4p0_v4p0__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12218,21 +12218,21 @@ define void @v_shuffle_v4p0_v4p0__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12273,21 +12273,21 @@ define void @v_shuffle_v4p0_v4p0__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12338,27 +12338,27 @@ define void @v_shuffle_v4p0_v4p0__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12405,25 +12405,25 @@ define void @v_shuffle_v4p0_v4p0__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: v_mov_b32_e32 v0, v10 -; GFX940-NEXT: v_mov_b32_e32 v1, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b32_e32 v0, v10 +; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12470,25 +12470,25 @@ define void @v_shuffle_v4p0_v4p0__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: v_mov_b32_e32 v2, v12 -; GFX940-NEXT: v_mov_b32_e32 v3, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b32_e32 v2, v12 +; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12535,24 +12535,24 @@ define void @v_shuffle_v4p0_v4p0__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: v_mov_b32_e32 v4, v14 -; GFX940-NEXT: v_mov_b32_e32 v5, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b32_e32 v4, v14 +; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12597,23 +12597,23 @@ define void @v_shuffle_v4p0_v4p0__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12654,21 +12654,21 @@ define void @v_shuffle_v4p0_v4p0__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12709,21 +12709,21 @@ define void @v_shuffle_v4p0_v4p0__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12762,20 +12762,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12822,25 +12822,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v8 -; GFX940-NEXT: v_mov_b32_e32 v3, v9 -; GFX940-NEXT: v_mov_b32_e32 v6, v8 -; GFX940-NEXT: v_mov_b32_e32 v7, v9 -; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v8 +; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b32_e32 v6, v8 +; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12887,25 +12887,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v12, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v10 -; GFX940-NEXT: v_mov_b32_e32 v5, v11 -; GFX940-NEXT: v_mov_b32_e32 v8, v10 -; GFX940-NEXT: v_mov_b32_e32 v9, v11 -; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v12, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v10 +; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b32_e32 v8, v10 +; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -12952,25 +12952,25 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v14, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:13] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v6, v12 -; GFX940-NEXT: v_mov_b32_e32 v7, v13 -; GFX940-NEXT: v_mov_b32_e32 v10, v12 -; GFX940-NEXT: v_mov_b32_e32 v11, v13 -; GFX940-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v14, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v6, v12 +; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b32_e32 v10, v12 +; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13019,26 +13019,26 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v16, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v12, v6 -; GFX940-NEXT: v_mov_b32_e32 v13, v7 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v12, v14 -; GFX940-NEXT: v_mov_b32_e32 v13, v15 -; GFX940-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v12, v6 +; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v12, v14 +; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13079,21 +13079,21 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13130,19 +13130,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13181,20 +13181,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p0_v4p0__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_mov_b32_e32 v4, v6 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_mov_b32_e32 v4, v6 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=v"() %vec1 = call <4 x ptr> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13239,17 +13239,17 @@ define void @s_shuffle_v4p0_v4p0__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -13283,18 +13283,18 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -13324,18 +13324,18 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -13369,18 +13369,18 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -13428,18 +13428,18 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13470,18 +13470,18 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13516,18 +13516,18 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13572,24 +13572,24 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13630,21 +13630,21 @@ define void @s_shuffle_v4p0_v4p0__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13689,23 +13689,23 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13746,23 +13746,23 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13801,20 +13801,20 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13873,20 +13873,20 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13921,20 +13921,20 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13983,23 +13983,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14048,23 +14048,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14105,23 +14105,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14166,25 +14166,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14223,22 +14223,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14277,22 +14277,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14357,22 +14357,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14425,27 +14425,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14498,27 +14498,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14571,27 +14571,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14640,25 +14640,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14705,24 +14705,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14765,24 +14765,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14829,24 +14829,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14913,22 +14913,22 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -14997,24 +14997,24 @@ define void @s_shuffle_v4p0_v4p0__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -15056,24 +15056,24 @@ define void @s_shuffle_v4p0_v4p0__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -15119,24 +15119,24 @@ define void @s_shuffle_v4p0_v4p0__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -15178,22 +15178,22 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -15245,27 +15245,27 @@ define void @s_shuffle_v4p0_v4p0__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15318,25 +15318,25 @@ define void @s_shuffle_v4p0_v4p0__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15389,27 +15389,27 @@ define void @s_shuffle_v4p0_v4p0__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15458,25 +15458,25 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15529,27 +15529,27 @@ define void @s_shuffle_v4p0_v4p0__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15602,27 +15602,27 @@ define void @s_shuffle_v4p0_v4p0__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15671,27 +15671,27 @@ define void @s_shuffle_v4p0_v4p0__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15744,27 +15744,27 @@ define void @s_shuffle_v4p0_v4p0__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15813,25 +15813,25 @@ define void @s_shuffle_v4p0_v4p0__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15884,27 +15884,27 @@ define void @s_shuffle_v4p0_v4p0__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -15957,25 +15957,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16024,23 +16024,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16093,25 +16093,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16160,27 +16160,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16229,27 +16229,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16298,25 +16298,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16369,25 +16369,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16436,25 +16436,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16635,25 +16635,25 @@ define void @s_shuffle_v4p0_v4p0__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16702,25 +16702,25 @@ define void @s_shuffle_v4p0_v4p0__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16769,25 +16769,25 @@ define void @s_shuffle_v4p0_v4p0__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16836,25 +16836,25 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16907,27 +16907,27 @@ define void @s_shuffle_v4p0_v4p0__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -16976,27 +16976,27 @@ define void @s_shuffle_v4p0_v4p0__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17045,27 +17045,27 @@ define void @s_shuffle_v4p0_v4p0__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17118,27 +17118,27 @@ define void @s_shuffle_v4p0_v4p0__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17187,25 +17187,25 @@ define void @s_shuffle_v4p0_v4p0__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17258,27 +17258,27 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17331,25 +17331,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17398,23 +17398,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17467,25 +17467,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17530,27 +17530,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17595,27 +17595,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17664,25 +17664,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17735,25 +17735,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -17802,25 +17802,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18001,25 +18001,25 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18064,25 +18064,25 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18131,25 +18131,25 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18194,23 +18194,23 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18263,27 +18263,27 @@ define void @s_shuffle_v4p0_v4p0__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18328,23 +18328,23 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18393,25 +18393,25 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18460,25 +18460,25 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18527,25 +18527,25 @@ define void @s_shuffle_v4p0_v4p0__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18594,25 +18594,25 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18657,25 +18657,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18720,25 +18720,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18783,27 +18783,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18852,27 +18852,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18921,27 +18921,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -18990,25 +18990,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19057,27 +19057,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19126,25 +19126,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19325,25 +19325,25 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19388,25 +19388,25 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19455,25 +19455,25 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19518,23 +19518,23 @@ define void @s_shuffle_v4p0_v4p0__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19587,27 +19587,27 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19652,23 +19652,23 @@ define void @s_shuffle_v4p0_v4p0__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19717,25 +19717,25 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19784,25 +19784,25 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19851,25 +19851,25 @@ define void @s_shuffle_v4p0_v4p0__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19918,25 +19918,25 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -19981,25 +19981,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20040,23 +20040,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20101,27 +20101,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20170,27 +20170,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20231,23 +20231,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20292,25 +20292,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20355,25 +20355,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20422,25 +20422,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20485,17 +20485,17 @@ define void @s_shuffle_v4p0_v4p0__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -20529,18 +20529,18 @@ define void @s_shuffle_v4p0_v4p0__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -20570,18 +20570,18 @@ define void @s_shuffle_v4p0_v4p0__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -20615,18 +20615,18 @@ define void @s_shuffle_v4p0_v4p0__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -20686,24 +20686,24 @@ define void @s_shuffle_v4p0_v4p0__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20746,24 +20746,24 @@ define void @s_shuffle_v4p0_v4p0__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20810,24 +20810,24 @@ define void @s_shuffle_v4p0_v4p0__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20870,22 +20870,22 @@ define void @s_shuffle_v4p0_v4p0__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -20938,28 +20938,28 @@ define void @s_shuffle_v4p0_v4p0__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21008,25 +21008,25 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21079,27 +21079,27 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21148,27 +21148,27 @@ define void @s_shuffle_v4p0_v4p0__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s16 -; GFX940-NEXT: s_mov_b32 s13, s17 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s16 +; GFX942-NEXT: s_mov_b32 s13, s17 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21215,24 +21215,24 @@ define void @s_shuffle_v4p0_v4p0__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21279,24 +21279,24 @@ define void @s_shuffle_v4p0_v4p0__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21339,24 +21339,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21395,22 +21395,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21463,25 +21463,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21534,25 +21534,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21597,25 +21597,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21664,27 +21664,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[16:23] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s22 -; GFX940-NEXT: s_mov_b32 s9, s23 -; GFX940-NEXT: s_mov_b32 s10, s22 -; GFX940-NEXT: s_mov_b32 s11, s23 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b32 s10, s22 +; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21727,24 +21727,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21791,24 +21791,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s0 -; GFX940-NEXT: s_mov_b32 s15, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21875,25 +21875,25 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -21942,25 +21942,25 @@ define void @s_shuffle_v4p0_v4p0__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22009,25 +22009,25 @@ define void @s_shuffle_v4p0_v4p0__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22076,25 +22076,25 @@ define void @s_shuffle_v4p0_v4p0__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s10 -; GFX940-NEXT: s_mov_b32 s13, s11 -; GFX940-NEXT: s_mov_b32 s14, s10 -; GFX940-NEXT: s_mov_b32 s15, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s10 +; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b32 s14, s10 +; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22231,22 +22231,22 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22295,28 +22295,28 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22361,25 +22361,25 @@ define void @s_shuffle_v4p0_v4p0__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22432,25 +22432,25 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22495,25 +22495,25 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22560,24 +22560,24 @@ define void @s_shuffle_v4p0_v4p0__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22620,24 +22620,24 @@ define void @s_shuffle_v4p0_v4p0__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22680,24 +22680,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22736,22 +22736,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22800,25 +22800,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22867,25 +22867,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22930,25 +22930,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -22997,25 +22997,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23058,24 +23058,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23118,24 +23118,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s2 -; GFX940-NEXT: s_mov_b32 s15, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23206,25 +23206,25 @@ define void @s_shuffle_v4p0_v4p0__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23273,25 +23273,25 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23340,25 +23340,25 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23407,25 +23407,25 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s12 -; GFX940-NEXT: s_mov_b32 s11, s13 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23590,25 +23590,25 @@ define void @s_shuffle_v4p0_v4p0__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23657,25 +23657,25 @@ define void @s_shuffle_v4p0_v4p0__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23724,25 +23724,25 @@ define void @s_shuffle_v4p0_v4p0__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23791,25 +23791,25 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s12 -; GFX940-NEXT: s_mov_b32 s15, s13 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s12 +; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23856,24 +23856,24 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s4 -; GFX940-NEXT: s_mov_b32 s13, s5 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s4 +; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23962,22 +23962,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24030,27 +24030,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24103,27 +24103,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24172,25 +24172,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24243,27 +24243,27 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[12:19] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s18 -; GFX940-NEXT: s_mov_b32 s9, s19 -; GFX940-NEXT: s_mov_b32 s10, s18 -; GFX940-NEXT: s_mov_b32 s11, s19 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s16 -; GFX940-NEXT: s_mov_b32 s15, s17 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s16 +; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24306,24 +24306,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24370,24 +24370,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s4 -; GFX940-NEXT: s_mov_b32 s15, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s4 +; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24458,25 +24458,25 @@ define void @s_shuffle_v4p0_v4p0__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24525,25 +24525,25 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24592,25 +24592,25 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s4 -; GFX940-NEXT: s_mov_b32 s9, s5 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s4 +; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24659,25 +24659,25 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24818,25 +24818,25 @@ define void @s_shuffle_v4p0_v4p0__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24885,25 +24885,25 @@ define void @s_shuffle_v4p0_v4p0__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24952,25 +24952,25 @@ define void @s_shuffle_v4p0_v4p0__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25019,25 +25019,25 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s14 -; GFX940-NEXT: s_mov_b32 s13, s15 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s14 +; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25084,24 +25084,24 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25218,25 +25218,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25285,25 +25285,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25352,25 +25352,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25419,25 +25419,25 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s14 -; GFX940-NEXT: s_mov_b32 s9, s15 -; GFX940-NEXT: s_mov_b32 s10, s14 -; GFX940-NEXT: s_mov_b32 s11, s15 -; GFX940-NEXT: s_mov_b32 s12, s6 -; GFX940-NEXT: s_mov_b32 s13, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b32 s10, s14 +; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b32 s12, s6 +; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25480,24 +25480,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s0 -; GFX940-NEXT: s_mov_b32 s13, s1 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25544,24 +25544,24 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s6 -; GFX940-NEXT: s_mov_b32 s11, s7 -; GFX940-NEXT: s_mov_b32 s12, s2 -; GFX940-NEXT: s_mov_b32 s13, s3 -; GFX940-NEXT: s_mov_b32 s14, s6 -; GFX940-NEXT: s_mov_b32 s15, s7 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:15] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s6 +; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b32 s14, s6 +; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll index f9253ca1b1ea4..3b5690562c38a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p3_v2p3__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4p3_v2p3__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4p3_v2p3__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -132,17 +132,17 @@ define void @v_shuffle_v4p3_v2p3__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -181,21 +181,21 @@ define void @v_shuffle_v4p3_v2p3__3_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -234,21 +234,21 @@ define void @v_shuffle_v4p3_v2p3__3_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -282,19 +282,19 @@ define void @v_shuffle_v4p3_v2p3__3_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -327,17 +327,17 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -376,20 +376,20 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -429,21 +429,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -478,19 +478,19 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -525,18 +525,18 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -577,21 +577,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -632,21 +632,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -682,18 +682,18 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -730,19 +730,19 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -779,19 +779,19 @@ define void @v_shuffle_v4p3_v2p3__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -827,19 +827,19 @@ define void @v_shuffle_v4p3_v2p3__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> zeroinitializer store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -874,18 +874,18 @@ define void @v_shuffle_v4p3_v2p3__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -921,19 +921,19 @@ define void @v_shuffle_v4p3_v2p3__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -974,21 +974,21 @@ define void @v_shuffle_v4p3_v2p3__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1029,21 +1029,21 @@ define void @v_shuffle_v4p3_v2p3__3_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1086,22 +1086,22 @@ define void @v_shuffle_v4p3_v2p3__3_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1142,21 +1142,21 @@ define void @v_shuffle_v4p3_v2p3__3_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1197,21 +1197,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1252,21 +1252,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1307,21 +1307,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1364,22 +1364,22 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1414,19 +1414,19 @@ define void @v_shuffle_v4p3_v2p3__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1460,19 +1460,19 @@ define void @v_shuffle_v4p3_v2p3__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1508,19 +1508,19 @@ define void @v_shuffle_v4p3_v2p3__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1554,19 +1554,19 @@ define void @v_shuffle_v4p3_v2p3__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1608,22 +1608,22 @@ define void @v_shuffle_v4p3_v2p3__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1664,21 +1664,21 @@ define void @v_shuffle_v4p3_v2p3__3_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1719,21 +1719,21 @@ define void @v_shuffle_v4p3_v2p3__3_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1774,21 +1774,21 @@ define void @v_shuffle_v4p3_v2p3__3_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1829,21 +1829,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1882,20 +1882,20 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1934,20 +1934,20 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1990,22 +1990,22 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2047,16 +2047,16 @@ define void @v_shuffle_v4p3_v2p3__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2088,17 +2088,17 @@ define void @v_shuffle_v4p3_v2p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2144,18 +2144,18 @@ define void @v_shuffle_v4p3_v2p3__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2190,18 +2190,18 @@ define void @v_shuffle_v4p3_v2p3__3_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2242,21 +2242,21 @@ define void @v_shuffle_v4p3_v2p3__3_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2297,21 +2297,21 @@ define void @v_shuffle_v4p3_v2p3__3_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2348,19 +2348,19 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2397,19 +2397,19 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2451,23 +2451,23 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2508,21 +2508,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2557,19 +2557,19 @@ define void @v_shuffle_v4p3_v2p3__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2610,21 +2610,21 @@ define void @v_shuffle_v4p3_v2p3__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2667,22 +2667,22 @@ define void @v_shuffle_v4p3_v2p3__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2717,19 +2717,19 @@ define void @v_shuffle_v4p3_v2p3__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2764,18 +2764,18 @@ define void @v_shuffle_v4p3_v2p3__3_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2816,21 +2816,21 @@ define void @v_shuffle_v4p3_v2p3__3_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2871,21 +2871,21 @@ define void @v_shuffle_v4p3_v2p3__3_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2920,18 +2920,18 @@ define void @v_shuffle_v4p3_v2p3__3_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2966,18 +2966,18 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v2, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3018,21 +3018,21 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3074,22 +3074,22 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3124,19 +3124,19 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v2p3__3_3_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v2p3__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3181,17 +3181,17 @@ define void @s_shuffle_v4p3_v2p3__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -3223,17 +3223,17 @@ define void @s_shuffle_v4p3_v2p3__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -3279,17 +3279,17 @@ define void @s_shuffle_v4p3_v2p3__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3330,21 +3330,21 @@ define void @s_shuffle_v4p3_v2p3__3_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3383,20 +3383,20 @@ define void @s_shuffle_v4p3_v2p3__3_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3431,18 +3431,18 @@ define void @s_shuffle_v4p3_v2p3__3_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3524,21 +3524,21 @@ define void @s_shuffle_v4p3_v2p3__3_3_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3621,22 +3621,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3698,20 +3698,20 @@ define void @s_shuffle_v4p3_v2p3__3_3_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3855,22 +3855,22 @@ define void @s_shuffle_v4p3_v2p3__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3911,21 +3911,21 @@ define void @s_shuffle_v4p3_v2p3__3_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3968,22 +3968,22 @@ define void @s_shuffle_v4p3_v2p3__3_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4026,22 +4026,22 @@ define void @s_shuffle_v4p3_v2p3__3_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4105,21 +4105,21 @@ define void @s_shuffle_v4p3_v2p3__3_3_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4162,22 +4162,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4220,22 +4220,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4355,22 +4355,22 @@ define void @s_shuffle_v4p3_v2p3__3_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4411,21 +4411,21 @@ define void @s_shuffle_v4p3_v2p3__3_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4468,22 +4468,22 @@ define void @s_shuffle_v4p3_v2p3__3_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4526,22 +4526,22 @@ define void @s_shuffle_v4p3_v2p3__3_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4651,22 +4651,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4711,17 +4711,17 @@ define void @s_shuffle_v4p3_v2p3__0_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -4753,17 +4753,17 @@ define void @s_shuffle_v4p3_v2p3__1_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -4860,22 +4860,22 @@ define void @s_shuffle_v4p3_v2p3__3_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4956,19 +4956,19 @@ define void @s_shuffle_v4p3_v2p3__3_3_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5011,22 +5011,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5071,23 +5071,23 @@ define void @s_shuffle_v4p3_v2p3__3_3_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[2:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[2:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5173,22 +5173,22 @@ define void @s_shuffle_v4p3_v2p3__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5271,22 +5271,22 @@ define void @s_shuffle_v4p3_v2p3__3_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[10:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[10:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5416,22 +5416,22 @@ define void @s_shuffle_v4p3_v2p3__3_3_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v2p3__3_3_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:9] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:1] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s9 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v2p3__3_3_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:1] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s9 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <2 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr addrspace(3)> %vec0, <2 x ptr addrspace(3)> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll index 28bc61ce57815..8039e126590b9 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p3_v3p3__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4p3_v3p3__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4p3_v3p3__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -174,17 +174,17 @@ define void @v_shuffle_v4p3_v3p3__4_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -217,17 +217,17 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -267,21 +267,21 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -320,21 +320,21 @@ define void @v_shuffle_v4p3_v3p3__5_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -375,21 +375,21 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -423,19 +423,19 @@ define void @v_shuffle_v4p3_v3p3__5_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -468,17 +468,17 @@ define void @v_shuffle_v4p3_v3p3__5_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -513,18 +513,18 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -565,22 +565,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -622,23 +622,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -679,22 +679,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -729,19 +729,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -777,19 +777,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -824,18 +824,18 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -877,22 +877,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -933,22 +933,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -990,22 +990,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1043,21 +1043,21 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1094,19 +1094,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1143,19 +1143,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1191,19 +1191,19 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1239,19 +1239,19 @@ define void @v_shuffle_v4p3_v3p3__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> zeroinitializer store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1287,20 +1287,20 @@ define void @v_shuffle_v4p3_v3p3__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1337,21 +1337,21 @@ define void @v_shuffle_v4p3_v3p3__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1386,19 +1386,19 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1440,23 +1440,23 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1500,23 +1500,23 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1559,22 +1559,22 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1618,23 +1618,23 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1678,23 +1678,23 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1738,23 +1738,23 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1796,22 +1796,22 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1855,23 +1855,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1914,22 +1914,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1972,23 +1972,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2032,23 +2032,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2092,23 +2092,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v9, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v9, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2151,23 +2151,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2202,18 +2202,18 @@ define void @v_shuffle_v4p3_v3p3__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2247,18 +2247,18 @@ define void @v_shuffle_v4p3_v3p3__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2294,19 +2294,19 @@ define void @v_shuffle_v4p3_v3p3__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2342,19 +2342,19 @@ define void @v_shuffle_v4p3_v3p3__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2388,18 +2388,18 @@ define void @v_shuffle_v4p3_v3p3__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -2441,23 +2441,23 @@ define void @v_shuffle_v4p3_v3p3__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2500,23 +2500,23 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2558,23 +2558,23 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2616,22 +2616,22 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2674,23 +2674,23 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2733,22 +2733,22 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2789,22 +2789,22 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2847,23 +2847,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2905,22 +2905,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2961,22 +2961,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3019,23 +3019,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3078,23 +3078,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3137,23 +3137,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3188,18 +3188,18 @@ define void @v_shuffle_v4p3_v3p3__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3233,18 +3233,18 @@ define void @v_shuffle_v4p3_v3p3__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3280,19 +3280,19 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3328,19 +3328,19 @@ define void @v_shuffle_v4p3_v3p3__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3374,18 +3374,18 @@ define void @v_shuffle_v4p3_v3p3__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3427,22 +3427,22 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3485,22 +3485,22 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3541,21 +3541,21 @@ define void @v_shuffle_v4p3_v3p3__5_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3598,23 +3598,23 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3655,21 +3655,21 @@ define void @v_shuffle_v4p3_v3p3__5_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3712,22 +3712,22 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3769,22 +3769,22 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3827,22 +3827,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3884,22 +3884,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3942,22 +3942,22 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4000,23 +4000,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4060,23 +4060,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[6:8] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: v_mov_b32_e32 v0, v8 -; GFX940-NEXT: v_mov_b32_e32 v1, v8 -; GFX940-NEXT: v_mov_b32_e32 v2, v6 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[6:8] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: v_mov_b32_e32 v0, v8 +; GFX942-NEXT: v_mov_b32_e32 v1, v8 +; GFX942-NEXT: v_mov_b32_e32 v2, v6 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4120,24 +4120,24 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4179,16 +4179,16 @@ define void @v_shuffle_v4p3_v3p3__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4220,17 +4220,17 @@ define void @v_shuffle_v4p3_v3p3__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4262,17 +4262,17 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4319,20 +4319,20 @@ define void @v_shuffle_v4p3_v3p3__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4370,21 +4370,21 @@ define void @v_shuffle_v4p3_v3p3__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4421,20 +4421,20 @@ define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4477,23 +4477,23 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4535,23 +4535,23 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4595,23 +4595,23 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4649,21 +4649,21 @@ define void @v_shuffle_v4p3_v3p3__5_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4701,21 +4701,21 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4752,19 +4752,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4806,23 +4806,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4864,23 +4864,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4922,23 +4922,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4975,19 +4975,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5022,18 +5022,18 @@ define void @v_shuffle_v4p3_v3p3__u_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__u_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5074,22 +5074,22 @@ define void @v_shuffle_v4p3_v3p3__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5132,23 +5132,23 @@ define void @v_shuffle_v4p3_v3p3__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5191,23 +5191,23 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5242,18 +5242,18 @@ define void @v_shuffle_v4p3_v3p3__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5290,19 +5290,19 @@ define void @v_shuffle_v4p3_v3p3__4_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5339,19 +5339,19 @@ define void @v_shuffle_v4p3_v3p3__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5387,19 +5387,19 @@ define void @v_shuffle_v4p3_v3p3__5_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5442,23 +5442,23 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5499,22 +5499,22 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5558,23 +5558,23 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5610,19 +5610,19 @@ define void @v_shuffle_v4p3_v3p3__5_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5659,19 +5659,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5707,19 +5707,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5762,23 +5762,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5822,23 +5822,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5881,23 +5881,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5932,19 +5932,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5979,18 +5979,18 @@ define void @v_shuffle_v4p3_v3p3__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6032,23 +6032,23 @@ define void @v_shuffle_v4p3_v3p3__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6091,23 +6091,23 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6150,23 +6150,23 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6201,18 +6201,18 @@ define void @v_shuffle_v4p3_v3p3__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6249,19 +6249,19 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6296,18 +6296,18 @@ define void @v_shuffle_v4p3_v3p3__5_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6350,22 +6350,22 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6408,23 +6408,23 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6467,23 +6467,23 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6520,20 +6520,20 @@ define void @v_shuffle_v4p3_v3p3__5_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6568,18 +6568,18 @@ define void @v_shuffle_v4p3_v3p3__5_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6615,19 +6615,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6670,23 +6670,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6729,23 +6729,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6788,23 +6788,23 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6841,19 +6841,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6889,19 +6889,19 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:4] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:4] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6946,17 +6946,17 @@ define void @s_shuffle_v4p3_v3p3__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -6988,17 +6988,17 @@ define void @s_shuffle_v4p3_v3p3__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -7030,17 +7030,17 @@ define void @s_shuffle_v4p3_v3p3__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -7086,17 +7086,17 @@ define void @s_shuffle_v4p3_v3p3__4_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__4_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7129,17 +7129,17 @@ define void @s_shuffle_v4p3_v3p3__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7180,21 +7180,21 @@ define void @s_shuffle_v4p3_v3p3__5_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7233,20 +7233,20 @@ define void @s_shuffle_v4p3_v3p3__5_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7287,21 +7287,21 @@ define void @s_shuffle_v4p3_v3p3__5_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7336,18 +7336,18 @@ define void @s_shuffle_v4p3_v3p3__5_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7401,18 +7401,18 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7455,22 +7455,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7513,22 +7513,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7569,21 +7569,21 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7620,19 +7620,19 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7669,19 +7669,19 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7744,22 +7744,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7802,22 +7802,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7860,22 +7860,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7914,20 +7914,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7966,20 +7966,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8037,19 +8037,19 @@ define void @s_shuffle_v4p3_v3p3__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -8107,20 +8107,20 @@ define void @s_shuffle_v4p3_v3p3__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -8158,20 +8158,20 @@ define void @s_shuffle_v4p3_v3p3__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -8207,19 +8207,19 @@ define void @s_shuffle_v4p3_v3p3__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -8263,23 +8263,23 @@ define void @s_shuffle_v4p3_v3p3__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8324,23 +8324,23 @@ define void @s_shuffle_v4p3_v3p3__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8383,22 +8383,22 @@ define void @s_shuffle_v4p3_v3p3__5_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8443,23 +8443,23 @@ define void @s_shuffle_v4p3_v3p3__5_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8504,23 +8504,23 @@ define void @s_shuffle_v4p3_v3p3__5_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8565,23 +8565,23 @@ define void @s_shuffle_v4p3_v3p3__5_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8624,22 +8624,22 @@ define void @s_shuffle_v4p3_v3p3__5_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8684,23 +8684,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8743,22 +8743,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8803,23 +8803,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8864,23 +8864,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8925,23 +8925,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8986,23 +8986,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9142,22 +9142,22 @@ define void @s_shuffle_v4p3_v3p3__4_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9200,22 +9200,22 @@ define void @s_shuffle_v4p3_v3p3__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9258,22 +9258,22 @@ define void @s_shuffle_v4p3_v3p3__5_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9318,23 +9318,23 @@ define void @s_shuffle_v4p3_v3p3__5_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9379,23 +9379,23 @@ define void @s_shuffle_v4p3_v3p3__5_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9440,23 +9440,23 @@ define void @s_shuffle_v4p3_v3p3__5_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @s_shuffle_v4p3_v3p3__5_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9559,23 +9559,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9618,22 +9618,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9678,23 +9678,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9739,23 +9739,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9800,23 +9800,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9861,23 +9861,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10017,22 +10017,22 @@ define void @s_shuffle_v4p3_v3p3__4_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10075,22 +10075,22 @@ define void @s_shuffle_v4p3_v3p3__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10131,21 +10131,21 @@ define void @s_shuffle_v4p3_v3p3__5_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10190,23 +10190,23 @@ define void @s_shuffle_v4p3_v3p3__5_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10247,21 +10247,21 @@ define void @s_shuffle_v4p3_v3p3__5_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10304,22 +10304,22 @@ define void @s_shuffle_v4p3_v3p3__5_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10362,22 +10362,22 @@ define void @s_shuffle_v4p3_v3p3__5_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10420,22 +10420,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10478,22 +10478,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10538,23 +10538,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10599,23 +10599,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10660,23 +10660,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10721,23 +10721,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10782,17 +10782,17 @@ define void @s_shuffle_v4p3_v3p3__0_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -10824,17 +10824,17 @@ define void @s_shuffle_v4p3_v3p3__1_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -10866,17 +10866,17 @@ define void @s_shuffle_v4p3_v3p3__2_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -10928,20 +10928,20 @@ define void @s_shuffle_v4p3_v3p3__4_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10980,20 +10980,20 @@ define void @s_shuffle_v4p3_v3p3__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11030,19 +11030,19 @@ define void @s_shuffle_v4p3_v3p3__5_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11087,23 +11087,23 @@ define void @s_shuffle_v4p3_v3p3__5_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11146,22 +11146,22 @@ define void @s_shuffle_v4p3_v3p3__5_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11206,23 +11206,23 @@ define void @s_shuffle_v4p3_v3p3__5_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11261,20 +11261,20 @@ define void @s_shuffle_v4p3_v3p3__5_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11313,20 +11313,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11363,19 +11363,19 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11420,23 +11420,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11481,23 +11481,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11540,22 +11540,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11594,20 +11594,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11670,22 +11670,22 @@ define void @s_shuffle_v4p3_v3p3__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11728,22 +11728,22 @@ define void @s_shuffle_v4p3_v3p3__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11786,22 +11786,22 @@ define void @s_shuffle_v4p3_v3p3__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11900,19 +11900,19 @@ define void @s_shuffle_v4p3_v3p3__5_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11957,23 +11957,23 @@ define void @s_shuffle_v4p3_v3p3__5_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12016,22 +12016,22 @@ define void @s_shuffle_v4p3_v3p3__5_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12076,23 +12076,23 @@ define void @s_shuffle_v4p3_v3p3__5_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12131,20 +12131,20 @@ define void @s_shuffle_v4p3_v3p3__5_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12183,20 +12183,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12233,19 +12233,19 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12290,23 +12290,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12351,23 +12351,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12410,22 +12410,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12464,20 +12464,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12540,22 +12540,22 @@ define void @s_shuffle_v4p3_v3p3__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12598,22 +12598,22 @@ define void @s_shuffle_v4p3_v3p3__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12656,22 +12656,22 @@ define void @s_shuffle_v4p3_v3p3__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12775,22 +12775,22 @@ define void @s_shuffle_v4p3_v3p3__5_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12833,22 +12833,22 @@ define void @s_shuffle_v4p3_v3p3__5_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12891,22 +12891,22 @@ define void @s_shuffle_v4p3_v3p3__5_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s10 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12945,20 +12945,20 @@ define void @s_shuffle_v4p3_v3p3__5_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13015,19 +13015,19 @@ define void @s_shuffle_v4p3_v3p3__5_5_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13072,23 +13072,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13133,23 +13133,23 @@ define void @s_shuffle_v4p3_v3p3__5_5_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:6] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:6] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13192,22 +13192,22 @@ define void @s_shuffle_v4p3_v3p3__5_5_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:10] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:10] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13246,20 +13246,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13298,20 +13298,20 @@ define void @s_shuffle_v4p3_v3p3__5_5_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v3p3__5_5_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:2] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v3p3__5_5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:2] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <3 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr addrspace(3)> %vec0, <3 x ptr addrspace(3)> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll index 9cc1b9fe6cf0e..eeab42ae40d7f 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX942 %s define void @v_shuffle_v4p3_v4p3__u_u_u_u(ptr addrspace(1) inreg %ptr) { @@ -38,16 +38,16 @@ define void @v_shuffle_v4p3_v4p3__0_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -79,17 +79,17 @@ define void @v_shuffle_v4p3_v4p3__1_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -121,17 +121,17 @@ define void @v_shuffle_v4p3_v4p3__2_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -163,17 +163,17 @@ define void @v_shuffle_v4p3_v4p3__3_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -216,17 +216,17 @@ define void @v_shuffle_v4p3_v4p3__5_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -259,17 +259,17 @@ define void @v_shuffle_v4p3_v4p3__6_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -302,17 +302,17 @@ define void @v_shuffle_v4p3_v4p3__7_u_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -352,21 +352,21 @@ define void @v_shuffle_v4p3_v4p3__7_0_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -405,21 +405,21 @@ define void @v_shuffle_v4p3_v4p3__7_1_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -459,21 +459,21 @@ define void @v_shuffle_v4p3_v4p3__7_2_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -514,21 +514,21 @@ define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -562,17 +562,17 @@ define void @v_shuffle_v4p3_v4p3__7_4_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -605,17 +605,17 @@ define void @v_shuffle_v4p3_v4p3__7_5_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -649,17 +649,17 @@ define void @v_shuffle_v4p3_v4p3__7_6_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -694,18 +694,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -748,23 +748,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -807,23 +807,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -864,22 +864,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -922,22 +922,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -973,18 +973,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1019,18 +1019,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1065,18 +1065,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1113,19 +1113,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_u(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1169,23 +1169,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1230,24 +1230,24 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1291,23 +1291,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1350,23 +1350,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1402,18 +1402,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1452,21 +1452,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1503,18 +1503,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1551,19 +1551,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1600,19 +1600,19 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1648,19 +1648,19 @@ define void @v_shuffle_v4p3_v4p3__0_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> zeroinitializer store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1696,19 +1696,19 @@ define void @v_shuffle_v4p3_v4p3__1_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1745,19 +1745,19 @@ define void @v_shuffle_v4p3_v4p3__2_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1794,19 +1794,19 @@ define void @v_shuffle_v4p3_v4p3__3_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1842,19 +1842,19 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -1897,23 +1897,23 @@ define void @v_shuffle_v4p3_v4p3__5_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -1958,24 +1958,24 @@ define void @v_shuffle_v4p3_v4p3__6_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2019,23 +2019,23 @@ define void @v_shuffle_v4p3_v4p3__7_0_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2078,23 +2078,23 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2139,24 +2139,24 @@ define void @v_shuffle_v4p3_v4p3__7_1_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2199,22 +2199,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2258,22 +2258,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2317,23 +2317,23 @@ define void @v_shuffle_v4p3_v4p3__7_4_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2376,23 +2376,23 @@ define void @v_shuffle_v4p3_v4p3__7_5_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2436,23 +2436,23 @@ define void @v_shuffle_v4p3_v4p3__7_6_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2497,24 +2497,24 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2557,23 +2557,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2617,23 +2617,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2678,23 +2678,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2737,22 +2737,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2796,22 +2796,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2854,23 +2854,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2913,23 +2913,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_0(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -2964,18 +2964,18 @@ define void @v_shuffle_v4p3_v4p3__u_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3009,18 +3009,18 @@ define void @v_shuffle_v4p3_v4p3__0_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3056,19 +3056,19 @@ define void @v_shuffle_v4p3_v4p3__1_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3104,19 +3104,19 @@ define void @v_shuffle_v4p3_v4p3__2_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3152,19 +3152,19 @@ define void @v_shuffle_v4p3_v4p3__3_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3198,18 +3198,18 @@ define void @v_shuffle_v4p3_v4p3__4_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -3251,23 +3251,23 @@ define void @v_shuffle_v4p3_v4p3__5_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3310,23 +3310,23 @@ define void @v_shuffle_v4p3_v4p3__6_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3369,23 +3369,23 @@ define void @v_shuffle_v4p3_v4p3__7_1_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3428,23 +3428,23 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3488,23 +3488,23 @@ define void @v_shuffle_v4p3_v4p3__7_0_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3548,22 +3548,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3606,22 +3606,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3665,23 +3665,23 @@ define void @v_shuffle_v4p3_v4p3__7_4_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3724,23 +3724,23 @@ define void @v_shuffle_v4p3_v4p3__7_5_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3784,23 +3784,23 @@ define void @v_shuffle_v4p3_v4p3__7_6_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3845,24 +3845,24 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3905,23 +3905,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -3966,24 +3966,24 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4028,23 +4028,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4089,23 +4089,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4149,22 +4149,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4207,22 +4207,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4265,23 +4265,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_1(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4316,18 +4316,18 @@ define void @v_shuffle_v4p3_v4p3__u_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4361,18 +4361,18 @@ define void @v_shuffle_v4p3_v4p3__0_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4407,18 +4407,18 @@ define void @v_shuffle_v4p3_v4p3__1_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4454,19 +4454,19 @@ define void @v_shuffle_v4p3_v4p3__2_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4501,18 +4501,18 @@ define void @v_shuffle_v4p3_v4p3__3_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4546,18 +4546,18 @@ define void @v_shuffle_v4p3_v4p3__4_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -4598,22 +4598,22 @@ define void @v_shuffle_v4p3_v4p3__5_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4656,22 +4656,22 @@ define void @v_shuffle_v4p3_v4p3__6_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4713,22 +4713,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4769,21 +4769,21 @@ define void @v_shuffle_v4p3_v4p3__7_u_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4826,21 +4826,21 @@ define void @v_shuffle_v4p3_v4p3__7_0_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4881,21 +4881,21 @@ define void @v_shuffle_v4p3_v4p3__7_1_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4938,22 +4938,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -4995,21 +4995,21 @@ define void @v_shuffle_v4p3_v4p3__7_4_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5052,22 +5052,22 @@ define void @v_shuffle_v4p3_v4p3__7_5_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5109,21 +5109,21 @@ define void @v_shuffle_v4p3_v4p3__7_6_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5166,22 +5166,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5224,22 +5224,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5284,23 +5284,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5344,22 +5344,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5403,22 +5403,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5462,22 +5462,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: v_mov_b32_e32 v8, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, v2 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: v_mov_b32_e32 v8, v4 +; GFX942-NEXT: v_mov_b32_e32 v9, v2 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5520,23 +5520,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5579,22 +5579,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_2(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5629,18 +5629,18 @@ define void @v_shuffle_v4p3_v4p3__u_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5674,18 +5674,18 @@ define void @v_shuffle_v4p3_v4p3__0_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5721,19 +5721,19 @@ define void @v_shuffle_v4p3_v4p3__1_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5769,19 +5769,19 @@ define void @v_shuffle_v4p3_v4p3__2_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5817,19 +5817,19 @@ define void @v_shuffle_v4p3_v4p3__3_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5863,18 +5863,18 @@ define void @v_shuffle_v4p3_v4p3__4_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -5916,22 +5916,22 @@ define void @v_shuffle_v4p3_v4p3__5_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -5974,22 +5974,22 @@ define void @v_shuffle_v4p3_v4p3__6_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v6 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v6 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6032,22 +6032,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6088,21 +6088,21 @@ define void @v_shuffle_v4p3_v4p3__7_u_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6145,21 +6145,21 @@ define void @v_shuffle_v4p3_v4p3__7_0_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[0:1] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6200,21 +6200,21 @@ define void @v_shuffle_v4p3_v4p3__7_1_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6256,22 +6256,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6313,21 +6313,21 @@ define void @v_shuffle_v4p3_v4p3__7_4_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6370,22 +6370,22 @@ define void @v_shuffle_v4p3_v4p3__7_5_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6427,21 +6427,21 @@ define void @v_shuffle_v4p3_v4p3__7_6_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0] +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6484,22 +6484,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6540,22 +6540,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6600,23 +6600,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6661,23 +6661,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6718,22 +6718,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6776,23 +6776,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6835,23 +6835,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6894,22 +6894,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_3(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -6951,16 +6951,16 @@ define void @v_shuffle_v4p3_v4p3__0_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -6992,17 +6992,17 @@ define void @v_shuffle_v4p3_v4p3__1_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7034,17 +7034,17 @@ define void @v_shuffle_v4p3_v4p3__2_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7076,17 +7076,17 @@ define void @v_shuffle_v4p3_v4p3__3_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> store <4 x ptr addrspace(3)> %shuf, ptr addrspace(1) %ptr, align 16 @@ -7133,19 +7133,19 @@ define void @v_shuffle_v4p3_v4p3__5_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7183,19 +7183,19 @@ define void @v_shuffle_v4p3_v4p3__6_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7233,19 +7233,19 @@ define void @v_shuffle_v4p3_v4p3__7_4_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7282,19 +7282,19 @@ define void @v_shuffle_v4p3_v4p3__7_u_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7338,23 +7338,23 @@ define void @v_shuffle_v4p3_v4p3__7_0_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7396,23 +7396,23 @@ define void @v_shuffle_v4p3_v4p3__7_1_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7455,23 +7455,23 @@ define void @v_shuffle_v4p3_v4p3__7_2_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7516,23 +7516,23 @@ define void @v_shuffle_v4p3_v4p3__7_3_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7571,20 +7571,20 @@ define void @v_shuffle_v4p3_v4p3__7_5_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7621,19 +7621,19 @@ define void @v_shuffle_v4p3_v4p3__7_6_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7671,19 +7671,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7719,18 +7719,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v0 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v0 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7774,22 +7774,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7833,23 +7833,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7891,23 +7891,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -7951,23 +7951,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8003,18 +8003,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8053,21 +8053,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_4(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v0 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8102,18 +8102,18 @@ define void @v_shuffle_v4p3_v4p3__u_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8154,22 +8154,22 @@ define void @v_shuffle_v4p3_v4p3__0_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8212,23 +8212,23 @@ define void @v_shuffle_v4p3_v4p3__1_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8271,22 +8271,22 @@ define void @v_shuffle_v4p3_v4p3__2_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8329,22 +8329,22 @@ define void @v_shuffle_v4p3_v4p3__3_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8379,18 +8379,18 @@ define void @v_shuffle_v4p3_v4p3__4_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8427,19 +8427,19 @@ define void @v_shuffle_v4p3_v4p3__5_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8476,19 +8476,19 @@ define void @v_shuffle_v4p3_v4p3__6_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8525,19 +8525,19 @@ define void @v_shuffle_v4p3_v4p3__7_5_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8574,19 +8574,19 @@ define void @v_shuffle_v4p3_v4p3__7_u_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8629,22 +8629,22 @@ define void @v_shuffle_v4p3_v4p3__7_0_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8685,22 +8685,22 @@ define void @v_shuffle_v4p3_v4p3__7_1_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8744,23 +8744,23 @@ define void @v_shuffle_v4p3_v4p3__7_2_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8805,23 +8805,23 @@ define void @v_shuffle_v4p3_v4p3__7_3_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8859,19 +8859,19 @@ define void @v_shuffle_v4p3_v4p3__7_4_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8908,19 +8908,19 @@ define void @v_shuffle_v4p3_v4p3__7_6_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -8957,19 +8957,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9004,18 +9004,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9058,22 +9058,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9116,22 +9116,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9174,23 +9174,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9235,23 +9235,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9288,19 +9288,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9339,21 +9339,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_5(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9388,18 +9388,18 @@ define void @v_shuffle_v4p3_v4p3__u_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9441,23 +9441,23 @@ define void @v_shuffle_v4p3_v4p3__0_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9499,22 +9499,22 @@ define void @v_shuffle_v4p3_v4p3__1_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9557,22 +9557,22 @@ define void @v_shuffle_v4p3_v4p3__2_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v6 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v6 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9614,22 +9614,22 @@ define void @v_shuffle_v4p3_v4p3__3_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9664,18 +9664,18 @@ define void @v_shuffle_v4p3_v4p3__4_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9711,18 +9711,18 @@ define void @v_shuffle_v4p3_v4p3__5_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9759,19 +9759,19 @@ define void @v_shuffle_v4p3_v4p3__6_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9807,18 +9807,18 @@ define void @v_shuffle_v4p3_v4p3__7_6_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9853,18 +9853,18 @@ define void @v_shuffle_v4p3_v4p3__7_u_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9906,22 +9906,22 @@ define void @v_shuffle_v4p3_v4p3__7_0_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v5, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v5, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -9964,23 +9964,23 @@ define void @v_shuffle_v4p3_v4p3__7_1_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v4 -; GFX940-NEXT: v_mov_b32_e32 v3, v4 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: v_mov_b32_e32 v3, v4 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10022,22 +10022,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10080,22 +10080,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v7, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v7, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10131,18 +10131,18 @@ define void @v_shuffle_v4p3_v4p3__7_4_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10177,18 +10177,18 @@ define void @v_shuffle_v4p3_v4p3__7_5_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10225,19 +10225,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10274,19 +10274,19 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v3, v2 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10331,23 +10331,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v10, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, v0 -; GFX940-NEXT: v_mov_b32_e32 v6, v5 -; GFX940-NEXT: v_mov_b32_e32 v7, v5 -; GFX940-NEXT: v_mov_b32_e32 v9, v4 -; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v10, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, v0 +; GFX942-NEXT: v_mov_b32_e32 v6, v5 +; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b32_e32 v9, v4 +; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10391,23 +10391,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10450,23 +10450,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v6 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v6 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10510,23 +10510,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10565,21 +10565,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v2 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v2 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10616,18 +10616,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_6(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10662,18 +10662,18 @@ define void @v_shuffle_v4p3_v4p3__u_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__u_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10716,23 +10716,23 @@ define void @v_shuffle_v4p3_v4p3__0_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10775,23 +10775,23 @@ define void @v_shuffle_v4p3_v4p3__1_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v1 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v1 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10834,22 +10834,22 @@ define void @v_shuffle_v4p3_v4p3__2_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10892,22 +10892,22 @@ define void @v_shuffle_v4p3_v4p3__3_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10942,18 +10942,18 @@ define void @v_shuffle_v4p3_v4p3__4_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__4_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -10990,19 +10990,19 @@ define void @v_shuffle_v4p3_v4p3__5_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__5_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v1 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v1 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11039,19 +11039,19 @@ define void @v_shuffle_v4p3_v4p3__6_7_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__6_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11086,18 +11086,18 @@ define void @v_shuffle_v4p3_v4p3__7_u_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_u_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11139,22 +11139,22 @@ define void @v_shuffle_v4p3_v4p3__7_0_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v4, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v4, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11197,23 +11197,23 @@ define void @v_shuffle_v4p3_v4p3__7_1_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v5 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v5 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11255,22 +11255,22 @@ define void @v_shuffle_v4p3_v4p3__7_2_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11313,22 +11313,22 @@ define void @v_shuffle_v4p3_v4p3__7_3_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11365,18 +11365,18 @@ define void @v_shuffle_v4p3_v4p3__7_4_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11411,18 +11411,18 @@ define void @v_shuffle_v4p3_v4p3__7_5_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_5_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11458,18 +11458,18 @@ define void @v_shuffle_v4p3_v4p3__7_6_7_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_6_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0] +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11504,18 +11504,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_u_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11558,23 +11558,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v0 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v0 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11617,23 +11617,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v6, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[2:5] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v5 -; GFX940-NEXT: v_mov_b32_e32 v3, v5 -; GFX940-NEXT: v_mov_b32_e32 v4, v1 -; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v6, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[2:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v5 +; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b32_e32 v4, v1 +; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11676,23 +11676,23 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v7 -; GFX940-NEXT: v_mov_b32_e32 v1, v7 -; GFX940-NEXT: v_mov_b32_e32 v3, v7 -; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v7 +; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11735,22 +11735,22 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, v7 -; GFX940-NEXT: v_mov_b32_e32 v5, v7 -; GFX940-NEXT: v_mov_b32_e32 v6, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, v7 +; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b32_e32 v6, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11789,21 +11789,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11842,21 +11842,21 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v8, 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_mov_b32_e32 v4, v3 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v6, v1 -; GFX940-NEXT: v_mov_b32_e32 v7, v3 -; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_mov_b32_e32 v4, v3 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v6, v1 +; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11891,18 +11891,18 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_7(ptr addrspace(1) inreg %ptr) { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: v_shuffle_v4p3_v4p3__7_7_6_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def v[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v3 -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def v[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -11947,17 +11947,17 @@ define void @s_shuffle_v4p3_v4p3__0_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__0_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__0_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -11989,17 +11989,17 @@ define void @s_shuffle_v4p3_v4p3__1_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -12031,17 +12031,17 @@ define void @s_shuffle_v4p3_v4p3__2_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -12073,17 +12073,17 @@ define void @s_shuffle_v4p3_v4p3__3_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -12129,17 +12129,17 @@ define void @s_shuffle_v4p3_v4p3__5_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12172,17 +12172,17 @@ define void @s_shuffle_v4p3_v4p3__6_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12215,17 +12215,17 @@ define void @s_shuffle_v4p3_v4p3__7_u_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12266,21 +12266,21 @@ define void @s_shuffle_v4p3_v4p3__7_0_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12319,20 +12319,20 @@ define void @s_shuffle_v4p3_v4p3__7_1_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12373,21 +12373,21 @@ define void @s_shuffle_v4p3_v4p3__7_2_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12428,21 +12428,21 @@ define void @s_shuffle_v4p3_v4p3__7_3_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12477,18 +12477,18 @@ define void @s_shuffle_v4p3_v4p3__7_4_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12542,18 +12542,18 @@ define void @s_shuffle_v4p3_v4p3__7_6_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12588,18 +12588,18 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12642,22 +12642,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12700,22 +12700,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12756,21 +12756,21 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12813,22 +12813,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12865,19 +12865,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12914,19 +12914,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -12983,19 +12983,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_u() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13040,23 +13040,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13101,23 +13101,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13162,23 +13162,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13221,22 +13221,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13275,20 +13275,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13327,20 +13327,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13379,20 +13379,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_7_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_7_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13450,19 +13450,19 @@ define void @s_shuffle_v4p3_v4p3__u_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__u_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -13520,20 +13520,20 @@ define void @s_shuffle_v4p3_v4p3__1_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -13571,20 +13571,20 @@ define void @s_shuffle_v4p3_v4p3__2_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -13622,20 +13622,20 @@ define void @s_shuffle_v4p3_v4p3__3_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -13671,19 +13671,19 @@ define void @s_shuffle_v4p3_v4p3__4_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__4_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__4_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -13727,23 +13727,23 @@ define void @s_shuffle_v4p3_v4p3__5_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s5 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s5 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13788,23 +13788,23 @@ define void @s_shuffle_v4p3_v4p3__6_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s6 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s6 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13849,23 +13849,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13908,22 +13908,22 @@ define void @s_shuffle_v4p3_v4p3__7_u_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -13968,23 +13968,23 @@ define void @s_shuffle_v4p3_v4p3__7_1_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14029,23 +14029,23 @@ define void @s_shuffle_v4p3_v4p3__7_2_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14090,23 +14090,23 @@ define void @s_shuffle_v4p3_v4p3__7_3_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14151,23 +14151,23 @@ define void @s_shuffle_v4p3_v4p3__7_4_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14210,22 +14210,22 @@ define void @s_shuffle_v4p3_v4p3__7_5_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_5_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_5_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14270,23 +14270,23 @@ define void @s_shuffle_v4p3_v4p3__7_6_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14331,23 +14331,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14390,22 +14390,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14450,23 +14450,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14511,23 +14511,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14572,23 +14572,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14633,23 +14633,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14694,23 +14694,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14753,22 +14753,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_0() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_0: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14928,22 +14928,22 @@ define void @s_shuffle_v4p3_v4p3__5_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -14986,22 +14986,22 @@ define void @s_shuffle_v4p3_v4p3__6_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15044,22 +15044,22 @@ define void @s_shuffle_v4p3_v4p3__7_1_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15102,22 +15102,22 @@ define void @s_shuffle_v4p3_v4p3__7_u_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15162,23 +15162,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15223,23 +15223,23 @@ define void @s_shuffle_v4p3_v4p3__7_2_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15284,23 +15284,23 @@ define void @s_shuffle_v4p3_v4p3__7_3_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15345,23 +15345,23 @@ define void @s_shuffle_v4p3_v4p3__7_4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s4 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s4 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15404,22 +15404,22 @@ define void @s_shuffle_v4p3_v4p3__7_5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_5_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15464,23 +15464,23 @@ define void @s_shuffle_v4p3_v4p3__7_6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s6 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s6 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15525,23 +15525,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15584,22 +15584,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15644,23 +15644,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15705,23 +15705,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15766,23 +15766,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15827,23 +15827,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15888,23 +15888,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -15947,22 +15947,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16122,22 +16122,22 @@ define void @s_shuffle_v4p3_v4p3__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16180,22 +16180,22 @@ define void @s_shuffle_v4p3_v4p3__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16238,22 +16238,22 @@ define void @s_shuffle_v4p3_v4p3__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16294,21 +16294,21 @@ define void @s_shuffle_v4p3_v4p3__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16353,23 +16353,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16410,21 +16410,21 @@ define void @s_shuffle_v4p3_v4p3__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16467,22 +16467,22 @@ define void @s_shuffle_v4p3_v4p3__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16525,22 +16525,22 @@ define void @s_shuffle_v4p3_v4p3__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16583,22 +16583,22 @@ define void @s_shuffle_v4p3_v4p3__7_5_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_5_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16641,22 +16641,22 @@ define void @s_shuffle_v4p3_v4p3__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16699,22 +16699,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16757,22 +16757,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16817,23 +16817,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16878,23 +16878,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -16939,23 +16939,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17000,23 +17000,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17061,23 +17061,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17120,22 +17120,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_2: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17295,22 +17295,22 @@ define void @s_shuffle_v4p3_v4p3__5_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17353,22 +17353,22 @@ define void @s_shuffle_v4p3_v4p3__6_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17411,22 +17411,22 @@ define void @s_shuffle_v4p3_v4p3__7_3_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17467,21 +17467,21 @@ define void @s_shuffle_v4p3_v4p3__7_u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17526,23 +17526,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17583,21 +17583,21 @@ define void @s_shuffle_v4p3_v4p3__7_1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17640,22 +17640,22 @@ define void @s_shuffle_v4p3_v4p3__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17698,22 +17698,22 @@ define void @s_shuffle_v4p3_v4p3__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17756,22 +17756,22 @@ define void @s_shuffle_v4p3_v4p3__7_5_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_5_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_5_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17814,22 +17814,22 @@ define void @s_shuffle_v4p3_v4p3__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17872,22 +17872,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17928,21 +17928,21 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -17987,23 +17987,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18048,23 +18048,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18105,21 +18105,21 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18162,22 +18162,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18220,22 +18220,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18278,22 +18278,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18338,17 +18338,17 @@ define void @s_shuffle_v4p3_v4p3__0_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__0_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__0_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -18380,17 +18380,17 @@ define void @s_shuffle_v4p3_v4p3__1_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -18422,17 +18422,17 @@ define void @s_shuffle_v4p3_v4p3__2_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -18464,17 +18464,17 @@ define void @s_shuffle_v4p3_v4p3__3_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x ptr addrspace(3)> %shuf) @@ -18526,20 +18526,20 @@ define void @s_shuffle_v4p3_v4p3__5_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__5_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18578,20 +18578,20 @@ define void @s_shuffle_v4p3_v4p3__6_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__6_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__6_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18630,20 +18630,20 @@ define void @s_shuffle_v4p3_v4p3__7_4_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18680,19 +18680,19 @@ define void @s_shuffle_v4p3_v4p3__7_u_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18737,23 +18737,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18796,22 +18796,22 @@ define void @s_shuffle_v4p3_v4p3__7_1_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18856,23 +18856,23 @@ define void @s_shuffle_v4p3_v4p3__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18917,23 +18917,23 @@ define void @s_shuffle_v4p3_v4p3__7_3_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s4 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s4 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -18972,20 +18972,20 @@ define void @s_shuffle_v4p3_v4p3__7_5_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_5_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19024,20 +19024,20 @@ define void @s_shuffle_v4p3_v4p3__7_6_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19076,20 +19076,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19126,19 +19126,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19183,23 +19183,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19244,23 +19244,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19303,22 +19303,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19363,23 +19363,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s4 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s4 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19418,20 +19418,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19470,20 +19470,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_4: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19546,22 +19546,22 @@ define void @s_shuffle_v4p3_v4p3__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__0_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s1 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s1 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19604,22 +19604,22 @@ define void @s_shuffle_v4p3_v4p3__1_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19662,22 +19662,22 @@ define void @s_shuffle_v4p3_v4p3__2_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19720,22 +19720,22 @@ define void @s_shuffle_v4p3_v4p3__3_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_5_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s9 -; GFX940-NEXT: s_mov_b32 s11, s9 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s9 +; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19855,19 +19855,19 @@ define void @s_shuffle_v4p3_v4p3__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_u_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19912,23 +19912,23 @@ define void @s_shuffle_v4p3_v4p3__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -19971,22 +19971,22 @@ define void @s_shuffle_v4p3_v4p3__7_1_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20031,23 +20031,23 @@ define void @s_shuffle_v4p3_v4p3__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20092,23 +20092,23 @@ define void @s_shuffle_v4p3_v4p3__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s5 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s5 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20147,20 +20147,20 @@ define void @s_shuffle_v4p3_v4p3__7_4_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20199,20 +20199,20 @@ define void @s_shuffle_v4p3_v4p3__7_6_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_6_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_6_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20251,20 +20251,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20301,19 +20301,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20358,23 +20358,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20419,23 +20419,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20478,22 +20478,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20538,23 +20538,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s5 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20593,20 +20593,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20645,20 +20645,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_6_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_6_5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20721,22 +20721,22 @@ define void @s_shuffle_v4p3_v4p3__0_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__0_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__0_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20779,22 +20779,22 @@ define void @s_shuffle_v4p3_v4p3__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20837,22 +20837,22 @@ define void @s_shuffle_v4p3_v4p3__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -20895,22 +20895,22 @@ define void @s_shuffle_v4p3_v4p3__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_6_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s10 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_6_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s10 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21056,22 +21056,22 @@ define void @s_shuffle_v4p3_v4p3__7_0_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21114,22 +21114,22 @@ define void @s_shuffle_v4p3_v4p3__7_1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21172,22 +21172,22 @@ define void @s_shuffle_v4p3_v4p3__7_2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21230,22 +21230,22 @@ define void @s_shuffle_v4p3_v4p3__7_3_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s10 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s10 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21284,20 +21284,20 @@ define void @s_shuffle_v4p3_v4p3__7_4_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_6_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s2 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21375,19 +21375,19 @@ define void @s_shuffle_v4p3_v4p3__7_7_u_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_u_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_u_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21432,23 +21432,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21493,23 +21493,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21552,22 +21552,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21612,23 +21612,23 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[4:7] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s7 -; GFX940-NEXT: s_mov_b32 s9, s7 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s6 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s7 +; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s6 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21667,20 +21667,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21719,20 +21719,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_6: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s2 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s2 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21795,22 +21795,22 @@ define void @s_shuffle_v4p3_v4p3__0_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__0_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__0_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21853,22 +21853,22 @@ define void @s_shuffle_v4p3_v4p3__1_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__1_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s1 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__1_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s1 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21911,22 +21911,22 @@ define void @s_shuffle_v4p3_v4p3__2_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__2_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s2 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__2_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s2 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -21969,22 +21969,22 @@ define void @s_shuffle_v4p3_v4p3__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__3_7_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22109,22 +22109,22 @@ define void @s_shuffle_v4p3_v4p3__7_0_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_0_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_0_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22167,22 +22167,22 @@ define void @s_shuffle_v4p3_v4p3__7_1_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_1_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_1_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22225,22 +22225,22 @@ define void @s_shuffle_v4p3_v4p3__7_2_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_2_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s2 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_2_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s2 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22283,22 +22283,22 @@ define void @s_shuffle_v4p3_v4p3__7_3_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_3_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s11 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_3_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s11 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22337,20 +22337,20 @@ define void @s_shuffle_v4p3_v4p3__7_4_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_4_7_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s0 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s0 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22454,22 +22454,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_0_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_0_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22512,22 +22512,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_1_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_1_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_1_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22570,22 +22570,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_2_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_2_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22628,22 +22628,22 @@ define void @s_shuffle_v4p3_v4p3__7_7_3_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_3_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s11 -; GFX940-NEXT: s_mov_b32 s9, s11 -; GFX940-NEXT: s_mov_b32 s10, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_3_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s11 +; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b32 s10, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22682,20 +22682,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_4_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_4_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s0 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> @@ -22734,20 +22734,20 @@ define void @s_shuffle_v4p3_v4p3__7_7_5_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: s_shuffle_v4p3_v4p3__7_7_5_7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; def s[0:3] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_mov_b32 s8, s3 -; GFX940-NEXT: s_mov_b32 s9, s3 -; GFX940-NEXT: s_mov_b32 s10, s1 -; GFX940-NEXT: s_mov_b32 s11, s3 -; GFX940-NEXT: ;;#ASMSTART -; GFX940-NEXT: ; use s[8:11] -; GFX940-NEXT: ;;#ASMEND -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: s_shuffle_v4p3_v4p3__7_7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:3] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s8, s3 +; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b32 s10, s1 +; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %vec1 = call <4 x ptr addrspace(3)> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr addrspace(3)> %vec0, <4 x ptr addrspace(3)> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll index ae166212fe79d..be50ad5041692 100644 --- a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll +++ b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll @@ -1,31 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s define protected amdgpu_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 { -; GFX940-LABEL: test: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-NEXT: v_mov_b32_e32 v2, v0 -; GFX940-NEXT: v_mov_b32_e32 v3, v0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX940-NEXT: v_mov_b64_e32 v[10:11], v[2:3] -; GFX940-NEXT: v_mov_b64_e32 v[8:9], v[0:1] -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v12, s4 -; GFX940-NEXT: v_mov_b32_e32 v13, s5 -; GFX940-NEXT: v_mov_b32_e32 v4, s6 -; GFX940-NEXT: v_mov_b32_e32 v5, s7 -; GFX940-NEXT: v_mov_b32_e32 v6, s7 -; GFX940-NEXT: v_mov_b32_e32 v7, s7 -; GFX940-NEXT: s_nop 1 -; GFX940-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13 -; GFX940-NEXT: s_nop 6 -; GFX940-NEXT: global_store_dword v0, v11, s[2:3] offset:12 sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: test: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v12, s4 +; GFX942-NEXT: v_mov_b32_e32 v13, s5 +; GFX942-NEXT: v_mov_b32_e32 v4, s6 +; GFX942-NEXT: v_mov_b32_e32 v5, s7 +; GFX942-NEXT: v_mov_b32_e32 v6, s7 +; GFX942-NEXT: v_mov_b32_e32 v7, s7 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13 +; GFX942-NEXT: s_nop 6 +; GFX942-NEXT: global_store_dword v0, v11, s[2:3] offset:12 +; GFX942-NEXT: s_endpgm entry: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 0 %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %in, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/uniform-select.ll b/llvm/test/CodeGen/AMDGPU/uniform-select.ll index 67f760320e78e..f001bf0d5e498 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-select.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-select.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX1030 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100 %s @@ -53,53 +53,53 @@ define amdgpu_kernel void @test_insert_extract(i32 %p, i32 %q) { ; GFX90A-NEXT: ; %bb.2: ; %DummyReturnBlock ; GFX90A-NEXT: s_endpgm ; -; GFX940-LABEL: test_insert_extract: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX940-NEXT: s_mov_b32 s2, 0 -; GFX940-NEXT: s_and_b64 vcc, exec, -1 -; GFX940-NEXT: s_mov_b32 s3, 0 -; GFX940-NEXT: s_mov_b32 s4, 0 -; GFX940-NEXT: s_mov_b32 s5, 0 -; GFX940-NEXT: s_mov_b32 s6, 0 -; GFX940-NEXT: .LBB0_1: ; %for.body -; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_cmp_eq_u32 s1, 1 -; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0 -; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX940-NEXT: s_cselect_b32 s7, s4, s3 -; GFX940-NEXT: s_cmp_eq_u32 s1, 2 -; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0 -; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX940-NEXT: s_cselect_b32 s7, s5, s7 -; GFX940-NEXT: s_cmp_eq_u32 s1, 3 -; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0 -; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX940-NEXT: s_cselect_b32 s7, s6, s7 -; GFX940-NEXT: s_or_b32 s7, s7, s0 -; GFX940-NEXT: s_cmp_eq_u32 s1, 1 -; GFX940-NEXT: s_cselect_b64 s[8:9], -1, 0 -; GFX940-NEXT: s_and_b64 s[10:11], s[8:9], exec -; GFX940-NEXT: s_cselect_b32 s4, s7, s4 -; GFX940-NEXT: s_cmp_eq_u32 s1, 3 -; GFX940-NEXT: s_cselect_b64 s[10:11], -1, 0 -; GFX940-NEXT: s_and_b64 s[12:13], s[10:11], exec -; GFX940-NEXT: s_cselect_b32 s6, s7, s6 -; GFX940-NEXT: s_cmp_eq_u32 s1, 2 -; GFX940-NEXT: s_cselect_b64 s[12:13], -1, 0 -; GFX940-NEXT: s_and_b64 s[14:15], s[12:13], exec -; GFX940-NEXT: s_cselect_b32 s5, s7, s5 -; GFX940-NEXT: s_cmp_eq_u32 s1, 0 -; GFX940-NEXT: s_cselect_b32 s3, s7, s3 -; GFX940-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9] -; GFX940-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; GFX940-NEXT: s_and_b64 s[8:9], s[8:9], exec -; GFX940-NEXT: s_cselect_b32 s2, 0, s2 -; GFX940-NEXT: s_mov_b64 vcc, vcc -; GFX940-NEXT: s_cbranch_vccnz .LBB0_1 -; GFX940-NEXT: ; %bb.2: ; %DummyReturnBlock -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: test_insert_extract: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942-NEXT: s_mov_b32 s2, 0 +; GFX942-NEXT: s_and_b64 vcc, exec, -1 +; GFX942-NEXT: s_mov_b32 s3, 0 +; GFX942-NEXT: s_mov_b32 s4, 0 +; GFX942-NEXT: s_mov_b32 s5, 0 +; GFX942-NEXT: s_mov_b32 s6, 0 +; GFX942-NEXT: .LBB0_1: ; %for.body +; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_cmp_eq_u32 s1, 1 +; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX942-NEXT: s_cselect_b32 s7, s4, s3 +; GFX942-NEXT: s_cmp_eq_u32 s1, 2 +; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX942-NEXT: s_cselect_b32 s7, s5, s7 +; GFX942-NEXT: s_cmp_eq_u32 s1, 3 +; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX942-NEXT: s_cselect_b32 s7, s6, s7 +; GFX942-NEXT: s_or_b32 s7, s7, s0 +; GFX942-NEXT: s_cmp_eq_u32 s1, 1 +; GFX942-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX942-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GFX942-NEXT: s_cselect_b32 s4, s7, s4 +; GFX942-NEXT: s_cmp_eq_u32 s1, 3 +; GFX942-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GFX942-NEXT: s_and_b64 s[12:13], s[10:11], exec +; GFX942-NEXT: s_cselect_b32 s6, s7, s6 +; GFX942-NEXT: s_cmp_eq_u32 s1, 2 +; GFX942-NEXT: s_cselect_b64 s[12:13], -1, 0 +; GFX942-NEXT: s_and_b64 s[14:15], s[12:13], exec +; GFX942-NEXT: s_cselect_b32 s5, s7, s5 +; GFX942-NEXT: s_cmp_eq_u32 s1, 0 +; GFX942-NEXT: s_cselect_b32 s3, s7, s3 +; GFX942-NEXT: s_or_b64 s[8:9], s[12:13], s[8:9] +; GFX942-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GFX942-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX942-NEXT: s_cselect_b32 s2, 0, s2 +; GFX942-NEXT: s_mov_b64 vcc, vcc +; GFX942-NEXT: s_cbranch_vccnz .LBB0_1 +; GFX942-NEXT: ; %bb.2: ; %DummyReturnBlock +; GFX942-NEXT: s_endpgm ; ; GFX1030-LABEL: test_insert_extract: ; GFX1030: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll index b08586efe2f21..db5f0ad42a677 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll @@ -2,7 +2,7 @@ ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s -; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s +; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX942 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s ; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s @@ -11,7 +11,7 @@ ; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU -; GFX940: LLVM ERROR: requested image instruction is not supported on this GPU +; GFX942: LLVM ERROR: requested image instruction is not supported on this GPU ; GFX1030-LABEL: image_sample_test: ; GFX1030: image_sample_lz diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir index 7c82d185c2c92..7a8feff59c1fe 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir +++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir @@ -1,12 +1,12 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX900 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX90A %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass postrapseudos -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX942 %s # GCN-LABEL: name: v_mov_b64_from_vgpr # GFX900: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX900: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $vgpr2_vgpr3, 12, $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec -# GFX940: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec +# GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 $vgpr2_vgpr3, implicit $exec name: v_mov_b64_from_vgpr body: | bb.0: @@ -17,7 +17,7 @@ body: | # GFX900: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX900: $vgpr1 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, $sgpr2_sgpr3, 12, $sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec -# GFX940: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr2_sgpr3, implicit $exec +# GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 $sgpr2_sgpr3, implicit $exec name: v_mov_b64_from_sgpr body: | bb.0: @@ -29,7 +29,7 @@ body: | # GFX900: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0 = V_MOV_B32_e32 -2, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX940: $vgpr0_vgpr1 = V_MOV_B64_e32 -2, implicit $exec +# GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 -2, implicit $exec name: v_mov_b64_from_sext_inline_imm body: | bb.0: @@ -67,7 +67,7 @@ body: | # GFX900: $vgpr0 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX900: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, -1, 8, -1, 0, 0, 0, 0, 0, implicit $exec -# GFX940: $vgpr0_vgpr1 = V_MOV_B64_e32 -1, implicit $exec +# GFX942: $vgpr0_vgpr1 = V_MOV_B64_e32 -1, implicit $exec name: v_mov_b64_from_same_sext_inline_imm body: | bb.0: @@ -78,7 +78,7 @@ body: | # GFX900: $vgpr0 = V_MOV_B32_e32 1065353216, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX900: $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, 1065353216, 8, 1065353216, 0, 0, 0, 0, 0, implicit $exec -# GFX940: $vgpr0_vgpr1 = V_PK_MOV_B32 8, 1065353216, 8, 1065353216, 0, 0, 0, 0, 0, implicit $exec +# GFX942: $vgpr0_vgpr1 = V_PK_MOV_B32 8, 1065353216, 8, 1065353216, 0, 0, 0, 0, 0, implicit $exec name: v_mov_b64_from_same_fp_inline_imm body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index b85bd4c634668..e9ae1ebe4572b 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GX900 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX940 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX942 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s @@ -43,16 +43,16 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_234u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_234u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_234u: ; GFX10: ; %bb.0: @@ -114,14 +114,14 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_u3u1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_u3u1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_u3u1: ; GFX10: ; %bb.0: @@ -183,16 +183,16 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_3u6u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_3u6u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_3u6u: ; GFX10: ; %bb.0: @@ -232,16 +232,16 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_3uu7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_3uu7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_3uu7: ; GFX10: ; %bb.0: @@ -281,16 +281,16 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_35u5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v4, v5, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_35u5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v4, v5, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_35u5: ; GFX10: ; %bb.0: @@ -329,17 +329,17 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_perm_b32 v0, v4, v6, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_357u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; GFX940-NEXT: global_load_dword v6, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v1, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v4, v6, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_357u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[2:3], off +; GFX942-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v1, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v4, v6, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_357u: ; GFX10: ; %bb.0: @@ -506,14 +506,14 @@ define <4 x half> @shuffle_v4f16_2301(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_2301: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_2301: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_2301: ; GFX10: ; %bb.0: @@ -856,14 +856,14 @@ define <4 x half> @shuffle_v4f16_6745(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_6745: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_6745: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_6745: ; GFX10: ; %bb.0: @@ -928,16 +928,16 @@ define <4 x half> @shuffle_v4f16_2356(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v0, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_2356: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v1, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_2356: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v1, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_2356: ; GFX10: ; %bb.0: @@ -977,16 +977,16 @@ define <4 x half> @shuffle_v4f16_5623(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_5623: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_5623: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_5623: ; GFX10: ; %bb.0: @@ -1105,17 +1105,17 @@ define <4 x half> @shuffle_v4f16_5734(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_alignbit_b32 v1, v4, v6, 16 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_5734: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; GFX940-NEXT: global_load_dword v6, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_alignbit_b32 v1, v4, v6, 16 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_5734: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[2:3], off +; GFX942-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_alignbit_b32 v1, v4, v6, 16 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_5734: ; GFX10: ; %bb.0: @@ -1156,16 +1156,16 @@ define <4 x i16> @shuffle_v4i16_2356(ptr addrspace(1) %arg0, ptr addrspace(1) %a ; GX900-NEXT: v_mov_b32_e32 v0, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4i16_2356: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v1, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4i16_2356: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v1, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4i16_2356: ; GFX10: ; %bb.0: @@ -1240,15 +1240,15 @@ define <4 x half> @shuffle_v4f16_0000(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v0 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_0000: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_0000: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_0000: ; GFX10: ; %bb.0: @@ -1320,16 +1320,16 @@ define <4 x half> @shuffle_v4f16_1100(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_perm_b32 v1, v1, v1, s5 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_1100: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_mov_b32 s1, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_1100: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_mov_b32 s1, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_1100: ; GFX10: ; %bb.0: @@ -1366,16 +1366,16 @@ define <4 x half> @shuffle_v4f16_6161(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_mov_b32_e32 v1, v0 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_6161: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s0, v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_6161: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s0, v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_6161: ; GFX10: ; %bb.0: @@ -1413,14 +1413,14 @@ define <4 x half> @shuffle_v4f16_2333(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_perm_b32 v1, v0, v0, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_2333: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_2333: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_2333: ; GFX10: ; %bb.0: @@ -1453,14 +1453,14 @@ define <4 x half> @shuffle_v4f16_6667(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_perm_b32 v1, v0, v0, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_6667: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_6667: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_6667: ; GFX10: ; %bb.0: @@ -1626,16 +1626,16 @@ define <4 x half> @shuffle_v8f16_13_14_2_3(ptr addrspace(1) %arg0, ptr addrspace ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v8f16_13_14_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off offset:8 -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v8f16_13_14_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off offset:8 +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8f16_13_14_2_3: ; GFX10: ; %bb.0: @@ -1673,14 +1673,14 @@ define <4 x half> @shuffle_v3f16_0122(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_perm_b32 v1, v1, v1, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v3f16_0122: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v3f16_0122: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v3f16_0122: ; GFX10: ; %bb.0: @@ -1749,20 +1749,20 @@ define <6 x half> @shuffle_v6f16_452367(ptr addrspace(1) %arg0, ptr addrspace(1) ; GX900-NEXT: v_mov_b32_e32 v2, v7 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v6f16_452367: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_load_dwordx3 v[0:2], v[6:7], off -; GFX940-NEXT: global_load_dword v3, v[4:5], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v6f16_452367: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_load_dwordx3 v[0:2], v[6:7], off +; GFX942-NEXT: global_load_dword v3, v[4:5], off +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v6f16_452367: ; GFX10: ; %bb.0: @@ -1814,23 +1814,23 @@ define amdgpu_kernel void @fma_shuffle_v2f16(ptr addrspace(1) nocapture readonly ; GX900-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] ; GX900-NEXT: s_endpgm ; -; GFX940-LABEL: fma_shuffle_v2f16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 3, v0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v6, s[0:1] -; GFX940-NEXT: global_load_dwordx2 v[2:3], v6, s[2:3] -; GFX940-NEXT: global_load_dwordx2 v[4:5], v6, s[6:7] -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_pk_fma_f16 v4, v0, v2, v4 op_sel_hi:[0,1,1] -; GFX940-NEXT: v_pk_fma_f16 v2, v1, v2, v5 op_sel_hi:[0,1,1] -; GFX940-NEXT: v_pk_fma_f16 v0, v0, v3, v4 op_sel:[1,0,0] -; GFX940-NEXT: v_pk_fma_f16 v1, v1, v3, v2 op_sel:[1,0,0] -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[6:7] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: fma_shuffle_v2f16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 3, v0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v6, s[0:1] +; GFX942-NEXT: global_load_dwordx2 v[2:3], v6, s[2:3] +; GFX942-NEXT: global_load_dwordx2 v[4:5], v6, s[6:7] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_pk_fma_f16 v4, v0, v2, v4 op_sel_hi:[0,1,1] +; GFX942-NEXT: v_pk_fma_f16 v2, v1, v2, v5 op_sel_hi:[0,1,1] +; GFX942-NEXT: v_pk_fma_f16 v0, v0, v3, v4 op_sel:[1,0,0] +; GFX942-NEXT: v_pk_fma_f16 v1, v1, v3, v2 op_sel:[1,0,0] +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[6:7] +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: fma_shuffle_v2f16: ; GFX10: ; %bb.0: ; %entry @@ -1915,16 +1915,16 @@ define <4 x half> @shuffle_v4f16_0456(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GX900-NEXT: v_alignbit_b32 v1, v6, v5, 16 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4f16_0456: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v6, v4, s0 -; GFX940-NEXT: v_alignbit_b32 v1, v7, v6, 16 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4f16_0456: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[0:1], off +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v6, v4, s0 +; GFX942-NEXT: v_alignbit_b32 v1, v7, v6, 16 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4f16_0456: ; GFX10: ; %bb.0: @@ -1968,17 +1968,17 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in, ; GX900-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GX900-NEXT: s_endpgm ; -; GFX940-LABEL: shuffle_scalar_load_v8i32_0123: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX940-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7] -; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: shuffle_scalar_load_v8i32_0123: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: shuffle_scalar_load_v8i32_0123: ; GFX10: ; %bb.0: @@ -2022,15 +2022,15 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: low16bits_v2f16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: low16bits_v2f16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: low16bits_v2f16: ; GFX10: ; %bb.0: ; %entry @@ -2068,15 +2068,15 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: hi16bits_v2f16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: hi16bits_v2f16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: hi16bits_v2f16: ; GFX10: ; %bb.0: ; %entry @@ -2114,15 +2114,15 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x ; GX900-NEXT: v_bfi_b32 v0, s4, v4, v5 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: low16hi16bits_v2f16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s0, v4, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: low16hi16bits_v2f16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s0, v4, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: low16hi16bits_v2f16: ; GFX10: ; %bb.0: ; %entry @@ -2195,15 +2195,15 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: i16_low16bits: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: i16_low16bits: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: i16_low16bits: ; GFX10: ; %bb.0: ; %entry @@ -2241,15 +2241,15 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) ; GX900-NEXT: v_bfi_b32 v0, s4, v4, v5 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: i16_low16hi16bits: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s0, v4, v5 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: i16_low16hi16bits: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s0, v4, v5 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: i16_low16hi16bits: ; GFX10: ; %bb.0: ; %entry @@ -2322,15 +2322,15 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: i16_hi16bits: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: i16_hi16bits: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: i16_hi16bits: ; GFX10: ; %bb.0: ; %entry @@ -2422,25 +2422,15 @@ entry: } define void @shuffle_v8f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v8f16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GX900-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v8f16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v8f16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8f16_concat: ; GFX10: ; %bb.0: @@ -2467,29 +2457,17 @@ define void @shuffle_v8f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v16f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v16f16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v16f16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v16f16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v16f16_concat: ; GFX10: ; %bb.0: @@ -2520,41 +2498,23 @@ define void @shuffle_v16f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg } define void @shuffle_v32f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v32f16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GX900-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GX900-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[14:17], off -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v32f16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GFX940-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GFX940-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[14:17], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v32f16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[14:17], off +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v32f16_concat: ; GFX10: ; %bb.0: @@ -2601,25 +2561,15 @@ define void @shuffle_v32f16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg } define void @shuffle_v8i16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v8i16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GX900-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v8i16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v8i16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8i16_concat: ; GFX10: ; %bb.0: @@ -2646,29 +2596,17 @@ define void @shuffle_v8i16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v16i16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v16i16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v16i16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v16i16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v16i16_concat: ; GFX10: ; %bb.0: @@ -2699,41 +2637,23 @@ define void @shuffle_v16i16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg } define void @shuffle_v32i16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v32i16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GX900-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GX900-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[14:17], off -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v32i16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GFX940-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GFX940-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[14:17], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v32i16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[14:17], off +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v32i16_concat: ; GFX10: ; %bb.0: @@ -2791,17 +2711,17 @@ define void @shuffle_v4i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ; GX900-NEXT: s_waitcnt vmcnt(0) ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4i8_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_ushort v6, v[0:1], off -; GFX940-NEXT: global_load_ushort v7, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v7, v6, s0 -; GFX940-NEXT: global_store_dword v[4:5], v0, off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4i8_concat: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_ushort v6, v[0:1], off +; GFX942-NEXT: global_load_ushort v7, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v7, v6, s0 +; GFX942-NEXT: global_store_dword v[4:5], v0, off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4i8_concat: ; GFX10: ; %bb.0: @@ -2828,25 +2748,15 @@ define void @shuffle_v4i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, } define void @shuffle_v8i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v8i8_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dword v6, v[0:1], off -; GX900-NEXT: global_load_dword v7, v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx2 v[4:5], v[6:7], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v8i8_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v6, v[0:1], off -; GFX940-NEXT: global_load_dword v7, v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx2 v[4:5], v[6:7], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v8i8_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v6, v[0:1], off +; GFX9-NEXT: global_load_dword v7, v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[6:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8i8_concat: ; GFX10: ; %bb.0: @@ -2873,25 +2783,15 @@ define void @shuffle_v8i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, } define void @shuffle_v16i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v16i8_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GX900-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v16i8_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v16i8_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v16i8_concat: ; GFX10: ; %bb.0: @@ -2918,29 +2818,17 @@ define void @shuffle_v16i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v32i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v32i8_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v32i8_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v32i8_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v32i8_concat: ; GFX10: ; %bb.0: @@ -2971,25 +2859,15 @@ define void @shuffle_v32i8_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v4i32_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v4i32_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GX900-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v4i32_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v4i32_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4i32_concat: ; GFX10: ; %bb.0: @@ -3016,29 +2894,17 @@ define void @shuffle_v4i32_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v8i32_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v8i32_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v8i32_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v8i32_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8i32_concat: ; GFX10: ; %bb.0: @@ -3069,41 +2935,23 @@ define void @shuffle_v8i32_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1 } define void @shuffle_v16i32_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v16i32_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GX900-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GX900-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[14:17], off -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v16i32_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GFX940-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GFX940-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[14:17], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v16i32_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[14:17], off +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v16i32_concat: ; GFX10: ; %bb.0: @@ -3188,16 +3036,16 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_234u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v6 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_234u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v6 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_234u: ; GFX10: ; %bb.0: @@ -3259,14 +3107,14 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_u3u1: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_u3u1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_u3u1: ; GFX10: ; %bb.0: @@ -3328,16 +3176,16 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_3u6u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_3u6u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_3u6u: ; GFX10: ; %bb.0: @@ -3377,16 +3225,16 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_3uu7: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_3uu7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_3uu7: ; GFX10: ; %bb.0: @@ -3426,16 +3274,16 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_35u5: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:4 -; GFX940-NEXT: global_load_dword v4, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v4, v5, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_35u5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX942-NEXT: global_load_dword v4, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v4, v5, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_35u5: ; GFX10: ; %bb.0: @@ -3474,17 +3322,17 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_perm_b32 v0, v4, v6, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_357u: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; GFX940-NEXT: global_load_dword v6, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v1, s0, v5, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v4, v6, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_357u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[2:3], off +; GFX942-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v1, s0, v5, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v4, v6, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_357u: ; GFX10: ; %bb.0: @@ -3651,14 +3499,14 @@ define <4 x bfloat> @shuffle_v4bf16_2301(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_2301: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_2301: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_2301: ; GFX10: ; %bb.0: @@ -4001,14 +3849,14 @@ define <4 x bfloat> @shuffle_v4bf16_6745(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v0, v2 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_6745: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v3 -; GFX940-NEXT: v_mov_b32_e32 v1, v2 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_6745: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[2:3], off +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v3 +; GFX942-NEXT: v_mov_b32_e32 v1, v2 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_6745: ; GFX10: ; %bb.0: @@ -4073,16 +3921,16 @@ define <4 x bfloat> @shuffle_v4bf16_2356(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v0, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_2356: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v1, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_2356: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v1, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v0, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_2356: ; GFX10: ; %bb.0: @@ -4122,16 +3970,16 @@ define <4 x bfloat> @shuffle_v4bf16_5623(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_5623: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_5623: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_5623: ; GFX10: ; %bb.0: @@ -4250,17 +4098,17 @@ define <4 x bfloat> @shuffle_v4bf16_5734(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_alignbit_b32 v1, v4, v6, 16 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_5734: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[2:3], off -; GFX940-NEXT: global_load_dword v6, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_alignbit_b32 v1, v4, v6, 16 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_5734: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[2:3], off +; GFX942-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_alignbit_b32 v1, v4, v6, 16 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_5734: ; GFX10: ; %bb.0: @@ -4300,15 +4148,15 @@ define <4 x bfloat> @shuffle_v4bf16_0000(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v0 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_0000: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v0, v0, s0 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_0000: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v0, v0, s0 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_0000: ; GFX10: ; %bb.0: @@ -4380,16 +4228,16 @@ define <4 x bfloat> @shuffle_v4bf16_1100(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_perm_b32 v1, v1, v1, s5 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_1100: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[2:3], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_mov_b32 s1, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v2, v2, s0 -; GFX940-NEXT: v_perm_b32 v1, v2, v2, s1 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_1100: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_mov_b32 s1, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v2, v2, s0 +; GFX942-NEXT: v_perm_b32 v1, v2, v2, s1 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_1100: ; GFX10: ; %bb.0: @@ -4426,16 +4274,16 @@ define <4 x bfloat> @shuffle_v4bf16_6161(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_mov_b32_e32 v1, v0 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_6161: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s0, v5, v4 -; GFX940-NEXT: v_mov_b32_e32 v1, v0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_6161: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s0, v5, v4 +; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_6161: ; GFX10: ; %bb.0: @@ -4473,14 +4321,14 @@ define <4 x bfloat> @shuffle_v4bf16_2333(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_perm_b32 v1, v0, v0, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_2333: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_2333: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_2333: ; GFX10: ; %bb.0: @@ -4513,14 +4361,14 @@ define <4 x bfloat> @shuffle_v4bf16_6667(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_perm_b32 v1, v0, v0, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_6667: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v0, v[0:1], off offset:4 -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v0, v0, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_6667: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v0, v[0:1], off offset:4 +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v0, v0, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_6667: ; GFX10: ; %bb.0: @@ -4686,16 +4534,16 @@ define <4 x bfloat> @shuffle_v8bf16_13_14_2_3(ptr addrspace(1) %arg0, ptr addrsp ; GX900-NEXT: v_mov_b32_e32 v1, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v8bf16_13_14_2_3: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off offset:8 -; GFX940-NEXT: global_load_dword v4, v[0:1], off offset:4 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_alignbit_b32 v0, v7, v6, 16 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v1, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v8bf16_13_14_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off offset:8 +; GFX942-NEXT: global_load_dword v4, v[0:1], off offset:4 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_alignbit_b32 v0, v7, v6, 16 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v1, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8bf16_13_14_2_3: ; GFX10: ; %bb.0: @@ -4733,14 +4581,14 @@ define <4 x bfloat> @shuffle_v3bf16_0122(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_perm_b32 v1, v1, v1, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v3bf16_0122: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v1, v1, v1, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v3bf16_0122: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v1, v1, v1, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v3bf16_0122: ; GFX10: ; %bb.0: @@ -4809,20 +4657,20 @@ define <6 x bfloat> @shuffle_v6bf16_452367(ptr addrspace(1) %arg0, ptr addrspace ; GX900-NEXT: v_mov_b32_e32 v2, v7 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v6bf16_452367: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v7, v1 -; GFX940-NEXT: v_mov_b32_e32 v6, v0 -; GFX940-NEXT: v_mov_b32_e32 v5, v3 -; GFX940-NEXT: v_mov_b32_e32 v4, v2 -; GFX940-NEXT: global_load_dwordx3 v[0:2], v[6:7], off -; GFX940-NEXT: global_load_dword v3, v[4:5], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_mov_b32_e32 v0, v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, v3 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v6bf16_452367: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b32_e32 v6, v0 +; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b32_e32 v4, v2 +; GFX942-NEXT: global_load_dwordx3 v[0:2], v[6:7], off +; GFX942-NEXT: global_load_dword v3, v[4:5], off +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_mov_b32_e32 v2, v3 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v6bf16_452367: ; GFX10: ; %bb.0: @@ -4940,90 +4788,90 @@ define amdgpu_kernel void @fma_shuffle_v2bf16(ptr addrspace(1) nocapture readonl ; GX900-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; GX900-NEXT: s_endpgm ; -; GFX940-LABEL: fma_shuffle_v2bf16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 -; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v6, 3, v0 -; GFX940-NEXT: s_movk_i32 s2, 0x7fff -; GFX940-NEXT: s_mov_b32 s3, 0x7060302 -; GFX940-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[0:1], v6, s[8:9] -; GFX940-NEXT: global_load_dwordx2 v[2:3], v6, s[0:1] -; GFX940-NEXT: global_load_dwordx2 v[4:5], v6, s[10:11] -; GFX940-NEXT: s_waitcnt vmcnt(2) -; GFX940-NEXT: v_lshlrev_b32_e32 v7, 16, v0 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: v_and_b32_e32 v8, 0xffff0000, v2 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_and_b32_e32 v9, 0xffff0000, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v1 -; GFX940-NEXT: v_and_b32_e32 v12, 0xffff0000, v3 -; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX940-NEXT: v_fmac_f32_e32 v8, v7, v9 -; GFX940-NEXT: v_fmac_f32_e32 v2, v7, v4 -; GFX940-NEXT: v_fmac_f32_e32 v3, v11, v4 -; GFX940-NEXT: v_bfe_u32 v4, v8, 16, 1 -; GFX940-NEXT: v_fmac_f32_e32 v12, v11, v9 -; GFX940-NEXT: v_or_b32_e32 v7, 0x400000, v8 -; GFX940-NEXT: v_bfe_u32 v9, v2, 16, 1 -; GFX940-NEXT: v_add3_u32 v4, v4, v8, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v8, v8 -; GFX940-NEXT: v_or_b32_e32 v11, 0x400000, v2 -; GFX940-NEXT: v_bfe_u32 v13, v12, 16, 1 -; GFX940-NEXT: v_add3_u32 v9, v9, v2, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: v_or_b32_e32 v14, 0x400000, v12 -; GFX940-NEXT: v_bfe_u32 v15, v3, 16, 1 -; GFX940-NEXT: v_add3_u32 v13, v13, v12, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v9, v11, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v12, v12 -; GFX940-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX940-NEXT: v_or_b32_e32 v16, 0x400000, v3 -; GFX940-NEXT: v_add3_u32 v15, v15, v3, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v7, v13, v14, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX940-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc -; GFX940-NEXT: v_and_b32_e32 v4, 0xffff0000, v4 -; GFX940-NEXT: v_fmac_f32_e32 v2, v0, v10 -; GFX940-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX940-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 -; GFX940-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 -; GFX940-NEXT: v_fmac_f32_e32 v4, v0, v5 -; GFX940-NEXT: v_bfe_u32 v0, v2, 16, 1 -; GFX940-NEXT: v_fmac_f32_e32 v3, v1, v10 -; GFX940-NEXT: v_fmac_f32_e32 v7, v1, v5 -; GFX940-NEXT: v_or_b32_e32 v1, 0x400000, v2 -; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX940-NEXT: v_add3_u32 v0, v0, v2, s2 -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 -; GFX940-NEXT: v_or_b32_e32 v8, 0x400000, v4 -; GFX940-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX940-NEXT: v_add3_u32 v5, v5, v4, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: v_or_b32_e32 v10, 0x400000, v3 -; GFX940-NEXT: v_bfe_u32 v11, v7, 16, 1 -; GFX940-NEXT: v_add3_u32 v9, v9, v3, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v8, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 -; GFX940-NEXT: v_or_b32_e32 v12, 0x400000, v7 -; GFX940-NEXT: v_add3_u32 v11, v11, v7, s2 -; GFX940-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v7, v7 -; GFX940-NEXT: v_perm_b32 v0, v2, v0, s3 -; GFX940-NEXT: s_nop 0 -; GFX940-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc -; GFX940-NEXT: v_perm_b32 v1, v3, v1, s3 -; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] sc0 sc1 -; GFX940-NEXT: s_endpgm +; GFX942-LABEL: fma_shuffle_v2bf16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 +; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v6, 3, v0 +; GFX942-NEXT: s_movk_i32 s2, 0x7fff +; GFX942-NEXT: s_mov_b32 s3, 0x7060302 +; GFX942-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[0:1], v6, s[8:9] +; GFX942-NEXT: global_load_dwordx2 v[2:3], v6, s[0:1] +; GFX942-NEXT: global_load_dwordx2 v[4:5], v6, s[10:11] +; GFX942-NEXT: s_waitcnt vmcnt(2) +; GFX942-NEXT: v_lshlrev_b32_e32 v7, 16, v0 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_and_b32_e32 v8, 0xffff0000, v2 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_and_b32_e32 v9, 0xffff0000, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX942-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX942-NEXT: v_lshlrev_b32_e32 v11, 16, v1 +; GFX942-NEXT: v_and_b32_e32 v12, 0xffff0000, v3 +; GFX942-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX942-NEXT: v_fmac_f32_e32 v8, v7, v9 +; GFX942-NEXT: v_fmac_f32_e32 v2, v7, v4 +; GFX942-NEXT: v_fmac_f32_e32 v3, v11, v4 +; GFX942-NEXT: v_bfe_u32 v4, v8, 16, 1 +; GFX942-NEXT: v_fmac_f32_e32 v12, v11, v9 +; GFX942-NEXT: v_or_b32_e32 v7, 0x400000, v8 +; GFX942-NEXT: v_bfe_u32 v9, v2, 16, 1 +; GFX942-NEXT: v_add3_u32 v4, v4, v8, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v8, v8 +; GFX942-NEXT: v_or_b32_e32 v11, 0x400000, v2 +; GFX942-NEXT: v_bfe_u32 v13, v12, 16, 1 +; GFX942-NEXT: v_add3_u32 v9, v9, v2, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: v_or_b32_e32 v14, 0x400000, v12 +; GFX942-NEXT: v_bfe_u32 v15, v3, 16, 1 +; GFX942-NEXT: v_add3_u32 v13, v13, v12, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v9, v11, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v12, v12 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX942-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX942-NEXT: v_or_b32_e32 v16, 0x400000, v3 +; GFX942-NEXT: v_add3_u32 v15, v15, v3, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v7, v13, v14, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX942-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc +; GFX942-NEXT: v_and_b32_e32 v4, 0xffff0000, v4 +; GFX942-NEXT: v_fmac_f32_e32 v2, v0, v10 +; GFX942-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX942-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 +; GFX942-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 +; GFX942-NEXT: v_fmac_f32_e32 v4, v0, v5 +; GFX942-NEXT: v_bfe_u32 v0, v2, 16, 1 +; GFX942-NEXT: v_fmac_f32_e32 v3, v1, v10 +; GFX942-NEXT: v_fmac_f32_e32 v7, v1, v5 +; GFX942-NEXT: v_or_b32_e32 v1, 0x400000, v2 +; GFX942-NEXT: v_bfe_u32 v5, v4, 16, 1 +; GFX942-NEXT: v_add3_u32 v0, v0, v2, s2 +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 +; GFX942-NEXT: v_or_b32_e32 v8, 0x400000, v4 +; GFX942-NEXT: v_bfe_u32 v9, v3, 16, 1 +; GFX942-NEXT: v_add3_u32 v5, v5, v4, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 +; GFX942-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX942-NEXT: v_bfe_u32 v11, v7, 16, 1 +; GFX942-NEXT: v_add3_u32 v9, v9, v3, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v2, v5, v8, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX942-NEXT: v_or_b32_e32 v12, 0x400000, v7 +; GFX942-NEXT: v_add3_u32 v11, v11, v7, s2 +; GFX942-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc +; GFX942-NEXT: v_cmp_u_f32_e32 vcc, v7, v7 +; GFX942-NEXT: v_perm_b32 v0, v2, v0, s3 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc +; GFX942-NEXT: v_perm_b32 v1, v3, v1, s3 +; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] +; GFX942-NEXT: s_endpgm ; ; GFX10-LABEL: fma_shuffle_v2bf16: ; GFX10: ; %bb.0: ; %entry @@ -5245,16 +5093,16 @@ define <4 x bfloat> @shuffle_v4bf16_0456(ptr addrspace(1) %arg0, ptr addrspace(1 ; GX900-NEXT: v_alignbit_b32 v1, v6, v5, 16 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: shuffle_v4bf16_0456: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[4:5], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v6, v4, s0 -; GFX940-NEXT: v_alignbit_b32 v1, v7, v6, 16 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: shuffle_v4bf16_0456: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx2 v[4:5], v[0:1], off +; GFX942-NEXT: global_load_dwordx2 v[6:7], v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v6, v4, s0 +; GFX942-NEXT: v_alignbit_b32 v1, v7, v6, 16 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v4bf16_0456: ; GFX10: ; %bb.0: @@ -5294,15 +5142,15 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: low16bits: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x5040100 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: low16bits: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x5040100 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: low16bits: ; GFX10: ; %bb.0: ; %entry @@ -5340,15 +5188,15 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) ; GX900-NEXT: v_perm_b32 v0, v5, v4, s4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: hi16bits_v2bf16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[0:1], off -; GFX940-NEXT: global_load_dword v5, v[2:3], off -; GFX940-NEXT: s_mov_b32 s0, 0x7060302 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_perm_b32 v0, v5, v4, s0 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: hi16bits_v2bf16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[0:1], off +; GFX942-NEXT: global_load_dword v5, v[2:3], off +; GFX942-NEXT: s_mov_b32 s0, 0x7060302 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_perm_b32 v0, v5, v4, s0 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: hi16bits_v2bf16: ; GFX10: ; %bb.0: ; %entry @@ -5386,15 +5234,15 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) ; GX900-NEXT: v_bfi_b32 v0, s4, v5, v4 ; GX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX940-LABEL: low16hi16bits_v2bf16: -; GFX940: ; %bb.0: ; %entry -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dword v4, v[2:3], off -; GFX940-NEXT: global_load_dword v5, v[0:1], off -; GFX940-NEXT: s_mov_b32 s0, 0xffff -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_bfi_b32 v0, s0, v5, v4 -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX942-LABEL: low16hi16bits_v2bf16: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dword v4, v[2:3], off +; GFX942-NEXT: global_load_dword v5, v[0:1], off +; GFX942-NEXT: s_mov_b32 s0, 0xffff +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: v_bfi_b32 v0, s0, v5, v4 +; GFX942-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: low16hi16bits_v2bf16: ; GFX10: ; %bb.0: ; %entry @@ -5489,25 +5337,15 @@ entry: } define void @shuffle_v8bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v8bf16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GX900-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v8bf16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx2 v[6:7], v[0:1], off -; GFX940-NEXT: global_load_dwordx2 v[8:9], v[2:3], off -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v8bf16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[6:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v8bf16_concat: ; GFX10: ; %bb.0: @@ -5534,29 +5372,17 @@ define void @shuffle_v8bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg } define void @shuffle_v16bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v16bf16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(1) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v16bf16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[0:1], off -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(1) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v16bf16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v16bf16_concat: ; GFX10: ; %bb.0: @@ -5587,41 +5413,23 @@ define void @shuffle_v16bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %ar } define void @shuffle_v32bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1, ptr addrspace(1) %out) { -; GX900-LABEL: shuffle_v32bf16_concat: -; GX900: ; %bb.0: -; GX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GX900-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GX900-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GX900-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GX900-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[14:17], off -; GX900-NEXT: s_waitcnt vmcnt(3) -; GX900-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 -; GX900-NEXT: s_waitcnt vmcnt(0) -; GX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX940-LABEL: shuffle_v32bf16_concat: -; GFX940: ; %bb.0: -; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: global_load_dwordx4 v[6:9], v[2:3], off -; GFX940-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 -; GFX940-NEXT: global_load_dwordx4 v[14:17], v[0:1], off -; GFX940-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[14:17], off sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(3) -; GFX940-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 sc0 sc1 -; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: shuffle_v32bf16_concat: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[2:3], off +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[2:3], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[18:21], v[0:1], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[10:13], off offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[14:17], off +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: global_store_dwordx4 v[4:5], v[18:21], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: shuffle_v32bf16_concat: ; GFX10: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir b/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir index a139a2e338984..d23c8d5860964 100644 --- a/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir +++ b/llvm/test/CodeGen/AMDGPU/verifier-sdwa-cvt.mir @@ -1,5 +1,5 @@ -# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx940 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s -# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx940 --passes='machine-function(verify)' -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s +# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx942 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s +# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx942 --passes='machine-function(verify)' -o /dev/null %s 2>&1 | FileCheck -implicit-check-not="Bad machine code" %s # CHECK: *** Bad machine code: sext, abs and neg are not allowed on this instruction *** # CHECK: $vgpr0 = V_CVT_F32_FP8_sdwa 1, $vgpr0, 0, 0, 4, implicit $mode, implicit $exec diff --git a/llvm/test/MC/AMDGPU/amdhsa-kd-kernarg-preload.s b/llvm/test/MC/AMDGPU/amdhsa-kd-kernarg-preload.s index f4ae23fc0aa7b..66a58951655fa 100644 --- a/llvm/test/MC/AMDGPU/amdhsa-kd-kernarg-preload.s +++ b/llvm/test/MC/AMDGPU/amdhsa-kd-kernarg-preload.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s -o - | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj < %s -o - | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s -.amdgcn_target "amdgcn-amd-amdhsa--gfx940" +.amdgcn_target "amdgcn-amd-amdhsa--gfx942" .rodata diff --git a/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s b/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s index e88b23bb34d4f..c994037ce59b3 100644 --- a/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s +++ b/llvm/test/MC/AMDGPU/extrasgprs_mcexpr.s @@ -1,9 +1,9 @@ // RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s // RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=GFX90A %s -// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=GFX940 %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefix=GFX942 %s // RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s -// gfx940 has architected flat scratch enabled. +// gfx942 has architected flat scratch enabled. // GFX7: .set extrasgpr_none, 0 // GFX7: .set extrasgpr_vcc, 2 @@ -15,10 +15,10 @@ // GFX90A: .set extrasgpr_flatscr, 6 // GFX90A: .set extrasgpr_xnack, 4 -// GFX940: .set extrasgpr_none, 6 -// GFX940: .set extrasgpr_vcc, 6 -// GFX940: .set extrasgpr_flatscr, 6 -// GFX940: .set extrasgpr_xnack, 6 +// GFX942: .set extrasgpr_none, 6 +// GFX942: .set extrasgpr_vcc, 6 +// GFX942: .set extrasgpr_flatscr, 6 +// GFX942: .set extrasgpr_xnack, 6 // GFX10: .set extrasgpr_none, 0 // GFX10: .set extrasgpr_vcc, 2 diff --git a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s b/llvm/test/MC/AMDGPU/flat-scratch-gfx942.s similarity index 57% rename from llvm/test/MC/AMDGPU/flat-scratch-gfx940.s rename to llvm/test/MC/AMDGPU/flat-scratch-gfx942.s index d3ca4281dca41..66d2bb6fe6fcc 100644 --- a/llvm/test/MC/AMDGPU/flat-scratch-gfx940.s +++ b/llvm/test/MC/AMDGPU/flat-scratch-gfx942.s @@ -1,1058 +1,1058 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX940 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck -check-prefix=GFX942 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX942 %s scratch_load_dword a2, v4, s6 -// GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] scratch_load_dword a2, v4, s6 offset:16 -// GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] scratch_load_dword a2, v4, off -// GFX940: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] scratch_load_dword a2, v4, off offset:16 -// GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] scratch_load_dword a2, off, s6 -// GFX940: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] scratch_load_dword a2, off, s6 offset:16 -// GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] scratch_load_dword a2, off, off -// GFX940: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] scratch_load_dword a2, off, off offset:16 -// GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] scratch_load_dword v2, v4, s6 -// GFX940: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] scratch_load_dword v2, v4, s6 offset:16 -// GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] scratch_load_dword v2, v4, off -// GFX940: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dword v2, v4, off offset:16 -// GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dword v2, off, s6 -// GFX940: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] scratch_load_dword v2, off, s6 offset:16 -// GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] scratch_load_dword v2, off, off -// GFX940: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dword v2, off, off offset:16 -// GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx2 a[2:3], v4, s6 -// GFX940: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx2 a[2:3], v4, s6 offset:16 -// GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx2 a[2:3], v4, off -// GFX940: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx2 a[2:3], v4, off offset:16 -// GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx2 a[2:3], off, s6 -// GFX940: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx2 a[2:3], off, s6 offset:16 -// GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx2 a[2:3], off, off -// GFX940: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx2 a[2:3], off, off offset:16 -// GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx2 v[2:3], v4, s6 -// GFX940: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx2 v[2:3], v4, s6 offset:16 -// GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx2 v[2:3], v4, off -// GFX940: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx2 v[2:3], v4, off offset:16 -// GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx2 v[2:3], off, s6 -// GFX940: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx2 v[2:3], off, s6 offset:16 -// GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx2 v[2:3], off, off -// GFX940: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx2 v[2:3], off, off offset:16 -// GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx3 a[2:4], v4, s6 -// GFX940: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx3 a[2:4], v4, s6 offset:16 -// GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx3 a[2:4], v4, off -// GFX940: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx3 a[2:4], v4, off offset:16 -// GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx3 a[2:4], off, s6 -// GFX940: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx3 a[2:4], off, s6 offset:16 -// GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx3 a[2:4], off, off -// GFX940: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx3 a[2:4], off, off offset:16 -// GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx3 v[2:4], v4, s6 -// GFX940: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx3 v[2:4], v4, s6 offset:16 -// GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx3 v[2:4], v4, off -// GFX940: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx3 v[2:4], v4, off offset:16 -// GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx3 v[2:4], off, s6 -// GFX940: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx3 v[2:4], off, s6 offset:16 -// GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx3 v[2:4], off, off -// GFX940: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx3 v[2:4], off, off offset:16 -// GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx4 a[2:5], v4, s6 -// GFX940: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx4 a[2:5], v4, s6 offset:16 -// GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] scratch_load_dwordx4 a[2:5], v4, off -// GFX940: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx4 a[2:5], v4, off offset:16 -// GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] scratch_load_dwordx4 a[2:5], off, s6 -// GFX940: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx4 a[2:5], off, s6 offset:16 -// GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] scratch_load_dwordx4 a[2:5], off, off -// GFX940: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx4 a[2:5], off, off offset:16 -// GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] scratch_load_dwordx4 v[2:5], v4, s6 -// GFX940: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx4 v[2:5], v4, s6 offset:16 -// GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] scratch_load_dwordx4 v[2:5], v4, off -// GFX940: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx4 v[2:5], v4, off offset:16 -// GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_dwordx4 v[2:5], off, s6 -// GFX940: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx4 v[2:5], off, s6 offset:16 -// GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] scratch_load_dwordx4 v[2:5], off, off -// GFX940: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_dwordx4 v[2:5], off, off offset:16 -// GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte a2, v4, s6 -// GFX940: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte a2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte a2, v4, off -// GFX940: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte a2, v4, off offset:16 -// GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte a2, off, s6 -// GFX940: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte a2, off, s6 offset:16 -// GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte a2, off, off -// GFX940: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte a2, off, off offset:16 -// GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte v2, v4, s6 -// GFX940: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte v2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte v2, v4, off -// GFX940: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte v2, v4, off offset:16 -// GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte v2, off, s6 -// GFX940: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte v2, off, s6 offset:16 -// GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte v2, off, off -// GFX940: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte v2, off, off offset:16 -// GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte_d16 a2, v4, s6 -// GFX940: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte_d16 a2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte_d16 a2, v4, off -// GFX940: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte_d16 a2, v4, off offset:16 -// GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte_d16 a2, off, s6 -// GFX940: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte_d16 a2, off, s6 offset:16 -// GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte_d16 a2, off, off -// GFX940: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte_d16 a2, off, off offset:16 -// GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte_d16 v2, v4, s6 -// GFX940: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte_d16 v2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte_d16 v2, v4, off -// GFX940: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte_d16 v2, v4, off offset:16 -// GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte_d16 v2, off, s6 -// GFX940: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte_d16 v2, off, s6 offset:16 -// GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte_d16 v2, off, off -// GFX940: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte_d16 v2, off, off offset:16 -// GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte_d16_hi a2, v4, s6 -// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] scratch_load_sbyte_d16_hi a2, v4, off -// GFX940: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte_d16_hi a2, v4, off offset:16 -// GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] scratch_load_sbyte_d16_hi a2, off, s6 -// GFX940: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte_d16_hi a2, off, s6 offset:16 -// GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] scratch_load_sbyte_d16_hi a2, off, off -// GFX940: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte_d16_hi a2, off, off offset:16 -// GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] scratch_load_sbyte_d16_hi v2, v4, s6 -// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 -// GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] scratch_load_sbyte_d16_hi v2, v4, off -// GFX940: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte_d16_hi v2, v4, off offset:16 -// GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sbyte_d16_hi v2, off, s6 -// GFX940: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte_d16_hi v2, off, s6 offset:16 -// GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] scratch_load_sbyte_d16_hi v2, off, off -// GFX940: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sbyte_d16_hi v2, off, off offset:16 -// GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_short_d16 a2, v4, s6 -// GFX940: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] scratch_load_short_d16 a2, v4, s6 offset:16 -// GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] scratch_load_short_d16 a2, v4, off -// GFX940: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] scratch_load_short_d16 a2, v4, off offset:16 -// GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] scratch_load_short_d16 a2, off, s6 -// GFX940: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] scratch_load_short_d16 a2, off, s6 offset:16 -// GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] scratch_load_short_d16 a2, off, off -// GFX940: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] scratch_load_short_d16 a2, off, off offset:16 -// GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] scratch_load_short_d16 v2, v4, s6 -// GFX940: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] scratch_load_short_d16 v2, v4, s6 offset:16 -// GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] scratch_load_short_d16 v2, v4, off -// GFX940: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] scratch_load_short_d16 v2, v4, off offset:16 -// GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] scratch_load_short_d16 v2, off, s6 -// GFX940: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] scratch_load_short_d16 v2, off, s6 offset:16 -// GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] scratch_load_short_d16 v2, off, off -// GFX940: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] scratch_load_short_d16 v2, off, off offset:16 -// GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] scratch_load_short_d16_hi a2, v4, s6 -// GFX940: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] scratch_load_short_d16_hi a2, v4, s6 offset:16 -// GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] scratch_load_short_d16_hi a2, v4, off -// GFX940: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] scratch_load_short_d16_hi a2, v4, off offset:16 -// GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] scratch_load_short_d16_hi a2, off, s6 -// GFX940: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] scratch_load_short_d16_hi a2, off, s6 offset:16 -// GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] scratch_load_short_d16_hi a2, off, off -// GFX940: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] scratch_load_short_d16_hi a2, off, off offset:16 -// GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] scratch_load_short_d16_hi v2, v4, s6 -// GFX940: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] scratch_load_short_d16_hi v2, v4, s6 offset:16 -// GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] scratch_load_short_d16_hi v2, v4, off -// GFX940: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] scratch_load_short_d16_hi v2, v4, off offset:16 -// GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] scratch_load_short_d16_hi v2, off, s6 -// GFX940: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] scratch_load_short_d16_hi v2, off, s6 offset:16 -// GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] scratch_load_short_d16_hi v2, off, off -// GFX940: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] scratch_load_short_d16_hi v2, off, off offset:16 -// GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sshort a2, v4, s6 -// GFX940: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] scratch_load_sshort a2, v4, s6 offset:16 -// GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] scratch_load_sshort a2, v4, off -// GFX940: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] scratch_load_sshort a2, v4, off offset:16 -// GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] scratch_load_sshort a2, off, s6 -// GFX940: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] scratch_load_sshort a2, off, s6 offset:16 -// GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] scratch_load_sshort a2, off, off -// GFX940: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] scratch_load_sshort a2, off, off offset:16 -// GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] scratch_load_sshort v2, v4, s6 -// GFX940: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] scratch_load_sshort v2, v4, s6 offset:16 -// GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] scratch_load_sshort v2, v4, off -// GFX940: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sshort v2, v4, off offset:16 -// GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] scratch_load_sshort v2, off, s6 -// GFX940: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] scratch_load_sshort v2, off, s6 offset:16 -// GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] scratch_load_sshort v2, off, off -// GFX940: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_sshort v2, off, off offset:16 -// GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte a2, v4, s6 -// GFX940: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte a2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte a2, v4, off -// GFX940: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte a2, v4, off offset:16 -// GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte a2, off, s6 -// GFX940: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte a2, off, s6 offset:16 -// GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte a2, off, off -// GFX940: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte a2, off, off offset:16 -// GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte v2, v4, s6 -// GFX940: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte v2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte v2, v4, off -// GFX940: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte v2, v4, off offset:16 -// GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte v2, off, s6 -// GFX940: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte v2, off, s6 offset:16 -// GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte v2, off, off -// GFX940: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte v2, off, off offset:16 -// GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte_d16 a2, v4, s6 -// GFX940: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte_d16 a2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte_d16 a2, v4, off -// GFX940: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte_d16 a2, v4, off offset:16 -// GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte_d16 a2, off, s6 -// GFX940: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte_d16 a2, off, s6 offset:16 -// GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte_d16 a2, off, off -// GFX940: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte_d16 a2, off, off offset:16 -// GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte_d16 v2, v4, s6 -// GFX940: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte_d16 v2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte_d16 v2, v4, off -// GFX940: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte_d16 v2, v4, off offset:16 -// GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte_d16 v2, off, s6 -// GFX940: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte_d16 v2, off, s6 offset:16 -// GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte_d16 v2, off, off -// GFX940: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte_d16 v2, off, off offset:16 -// GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte_d16_hi a2, v4, s6 -// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] scratch_load_ubyte_d16_hi a2, v4, off -// GFX940: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte_d16_hi a2, v4, off offset:16 -// GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] scratch_load_ubyte_d16_hi a2, off, s6 -// GFX940: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte_d16_hi a2, off, s6 offset:16 -// GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] scratch_load_ubyte_d16_hi a2, off, off -// GFX940: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte_d16_hi a2, off, off offset:16 -// GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] scratch_load_ubyte_d16_hi v2, v4, s6 -// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 -// GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] scratch_load_ubyte_d16_hi v2, v4, off -// GFX940: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte_d16_hi v2, v4, off offset:16 -// GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ubyte_d16_hi v2, off, s6 -// GFX940: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte_d16_hi v2, off, s6 offset:16 -// GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] scratch_load_ubyte_d16_hi v2, off, off -// GFX940: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ubyte_d16_hi v2, off, off offset:16 -// GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ushort a2, v4, s6 -// GFX940: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] scratch_load_ushort a2, v4, s6 offset:16 -// GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] +// GFX942: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] scratch_load_ushort a2, v4, off -// GFX940: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] scratch_load_ushort a2, v4, off offset:16 -// GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] +// GFX942: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] scratch_load_ushort a2, off, s6 -// GFX940: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] scratch_load_ushort a2, off, s6 offset:16 -// GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] +// GFX942: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] scratch_load_ushort a2, off, off -// GFX940: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] scratch_load_ushort a2, off, off offset:16 -// GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] +// GFX942: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] scratch_load_ushort v2, v4, s6 -// GFX940: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] scratch_load_ushort v2, v4, s6 offset:16 -// GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] +// GFX942: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] scratch_load_ushort v2, v4, off -// GFX940: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ushort v2, v4, off offset:16 -// GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] +// GFX942: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] scratch_load_ushort v2, off, s6 -// GFX940: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] scratch_load_ushort v2, off, s6 offset:16 -// GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] +// GFX942: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] scratch_load_ushort v2, off, off -// GFX940: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] scratch_load_ushort v2, off, off offset:16 -// GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] +// GFX942: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] scratch_store_byte v4, a2, s6 -// GFX940: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] scratch_store_byte v4, a2, s6 offset:16 -// GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] scratch_store_byte v4, a2, off -// GFX940: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] scratch_store_byte v4, a2, off offset:16 -// GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] scratch_store_byte off, a2, s6 -// GFX940: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] scratch_store_byte off, a2, s6 offset:16 -// GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] scratch_store_byte off, a2, off -// GFX940: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] scratch_store_byte off, a2, off offset:16 -// GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] scratch_store_byte v4, v2, s6 -// GFX940: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] scratch_store_byte v4, v2, s6 offset:16 -// GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] scratch_store_byte v4, v2, off -// GFX940: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] scratch_store_byte v4, v2, off offset:16 -// GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] scratch_store_byte off, v2, s6 -// GFX940: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] scratch_store_byte off, v2, s6 offset:16 -// GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] scratch_store_byte off, v2, off -// GFX940: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] scratch_store_byte off, v2, off offset:16 -// GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] scratch_store_byte_d16_hi v4, a2, s6 -// GFX940: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] scratch_store_byte_d16_hi v4, a2, s6 offset:16 -// GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] scratch_store_byte_d16_hi v4, a2, off -// GFX940: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] scratch_store_byte_d16_hi v4, a2, off offset:16 -// GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] scratch_store_byte_d16_hi off, a2, s6 -// GFX940: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] scratch_store_byte_d16_hi off, a2, s6 offset:16 -// GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] scratch_store_byte_d16_hi off, a2, off -// GFX940: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] scratch_store_byte_d16_hi off, a2, off offset:16 -// GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] scratch_store_byte_d16_hi v4, v2, s6 -// GFX940: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] scratch_store_byte_d16_hi v4, v2, s6 offset:16 -// GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] scratch_store_byte_d16_hi v4, v2, off -// GFX940: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] scratch_store_byte_d16_hi v4, v2, off offset:16 -// GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] scratch_store_byte_d16_hi off, v2, s6 -// GFX940: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] scratch_store_byte_d16_hi off, v2, s6 offset:16 -// GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] scratch_store_byte_d16_hi off, v2, off -// GFX940: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] scratch_store_byte_d16_hi off, v2, off offset:16 -// GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dword v4, a2, s6 -// GFX940: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] scratch_store_dword v4, a2, s6 offset:16 -// GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] scratch_store_dword v4, a2, off -// GFX940: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] scratch_store_dword v4, a2, off offset:16 -// GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] scratch_store_dword off, a2, s6 -// GFX940: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] scratch_store_dword off, a2, s6 offset:16 -// GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] scratch_store_dword off, a2, off -// GFX940: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] scratch_store_dword off, a2, off offset:16 -// GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] scratch_store_dword v4, v2, s6 -// GFX940: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] scratch_store_dword v4, v2, s6 offset:16 -// GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] scratch_store_dword v4, v2, off -// GFX940: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dword v4, v2, off offset:16 -// GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dword off, v2, s6 -// GFX940: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] scratch_store_dword off, v2, s6 offset:16 -// GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] scratch_store_dword off, v2, off -// GFX940: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dword off, v2, off offset:16 -// GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx2 v4, a[2:3], s6 -// GFX940: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx2 v4, a[2:3], s6 offset:16 -// GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx2 v4, a[2:3], off -// GFX940: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx2 v4, a[2:3], off offset:16 -// GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx2 off, a[2:3], s6 -// GFX940: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx2 off, a[2:3], s6 offset:16 -// GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx2 off, a[2:3], off -// GFX940: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx2 off, a[2:3], off offset:16 -// GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx2 v4, v[2:3], s6 -// GFX940: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx2 v4, v[2:3], s6 offset:16 -// GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx2 v4, v[2:3], off -// GFX940: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx2 v4, v[2:3], off offset:16 -// GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx2 off, v[2:3], s6 -// GFX940: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx2 off, v[2:3], s6 offset:16 -// GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx2 off, v[2:3], off -// GFX940: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx2 off, v[2:3], off offset:16 -// GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx3 v4, a[2:4], s6 -// GFX940: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx3 v4, a[2:4], s6 offset:16 -// GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx3 v4, a[2:4], off -// GFX940: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx3 v4, a[2:4], off offset:16 -// GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx3 off, a[2:4], s6 -// GFX940: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx3 off, a[2:4], s6 offset:16 -// GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx3 off, a[2:4], off -// GFX940: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx3 off, a[2:4], off offset:16 -// GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx3 v4, v[2:4], s6 -// GFX940: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx3 v4, v[2:4], s6 offset:16 -// GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx3 v4, v[2:4], off -// GFX940: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx3 v4, v[2:4], off offset:16 -// GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx3 off, v[2:4], s6 -// GFX940: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx3 off, v[2:4], s6 offset:16 -// GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx3 off, v[2:4], off -// GFX940: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx3 off, v[2:4], off offset:16 -// GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx4 v4, a[2:5], s6 -// GFX940: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx4 v4, a[2:5], s6 offset:16 -// GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] scratch_store_dwordx4 v4, a[2:5], off -// GFX940: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx4 v4, a[2:5], off offset:16 -// GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] scratch_store_dwordx4 off, a[2:5], s6 -// GFX940: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx4 off, a[2:5], s6 offset:16 -// GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] scratch_store_dwordx4 off, a[2:5], off -// GFX940: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx4 off, a[2:5], off offset:16 -// GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] scratch_store_dwordx4 v4, v[2:5], s6 -// GFX940: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx4 v4, v[2:5], s6 offset:16 -// GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] scratch_store_dwordx4 v4, v[2:5], off -// GFX940: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx4 v4, v[2:5], off offset:16 -// GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] scratch_store_dwordx4 off, v[2:5], s6 -// GFX940: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx4 off, v[2:5], s6 offset:16 -// GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] scratch_store_dwordx4 off, v[2:5], off -// GFX940: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] scratch_store_dwordx4 off, v[2:5], off offset:16 -// GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] scratch_store_short v4, a2, s6 -// GFX940: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] scratch_store_short v4, a2, s6 offset:16 -// GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] scratch_store_short v4, a2, off -// GFX940: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] scratch_store_short v4, a2, off offset:16 -// GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] scratch_store_short off, a2, s6 -// GFX940: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] scratch_store_short off, a2, s6 offset:16 -// GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] scratch_store_short off, a2, off -// GFX940: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] scratch_store_short off, a2, off offset:16 -// GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] scratch_store_short v4, v2, s6 -// GFX940: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] scratch_store_short v4, v2, s6 offset:16 -// GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] scratch_store_short v4, v2, off -// GFX940: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] scratch_store_short v4, v2, off offset:16 -// GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] scratch_store_short off, v2, s6 -// GFX940: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] scratch_store_short off, v2, s6 offset:16 -// GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] scratch_store_short off, v2, off -// GFX940: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] scratch_store_short off, v2, off offset:16 -// GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] scratch_store_short_d16_hi v4, a2, s6 -// GFX940: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] scratch_store_short_d16_hi v4, a2, s6 offset:16 -// GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] +// GFX942: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] scratch_store_short_d16_hi v4, a2, off -// GFX940: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] scratch_store_short_d16_hi v4, a2, off offset:16 -// GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] +// GFX942: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] scratch_store_short_d16_hi off, a2, s6 -// GFX940: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] scratch_store_short_d16_hi off, a2, s6 offset:16 -// GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] +// GFX942: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] scratch_store_short_d16_hi off, a2, off -// GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] scratch_store_short_d16_hi off, a2, off offset:16 -// GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] +// GFX942: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] scratch_store_short_d16_hi v4, v2, s6 -// GFX940: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] scratch_store_short_d16_hi v4, v2, s6 offset:16 -// GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] +// GFX942: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] scratch_store_short_d16_hi v4, v2, off -// GFX940: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] scratch_store_short_d16_hi v4, v2, off offset:16 -// GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] +// GFX942: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] scratch_store_short_d16_hi off, v2, s6 -// GFX940: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] scratch_store_short_d16_hi off, v2, s6 offset:16 -// GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] +// GFX942: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] scratch_store_short_d16_hi off, v2, off -// GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] scratch_store_short_d16_hi off, v2, off offset:16 -// GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] +// GFX942: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx940_err.s b/llvm/test/MC/AMDGPU/gfx940_err.s deleted file mode 100644 index 000f3decf9607..0000000000000 --- a/llvm/test/MC/AMDGPU/gfx940_err.s +++ /dev/null @@ -1,127 +0,0 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 --implicit-check-not=error: %s - -v_mac_f32 v0, v1, v2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mac_f32_e64 v5, v1, v2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mac_f32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mad_f32 v0, v1, v2, v3 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_madak_f32 v0, v1, v2, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_madmk_f32 v0, v1, 0, v2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mad_legacy_f32 v0, v1, v2, v3 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -v_mov_b64 v[2:3], v[4:5] row_shl:1 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast - -v_mov_b64 v[2:3], -v[4:5] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_mov_b64 v[2:3], |v[4:5]| -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_mov_b64 v[2:3], v[4:5] dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_mov_b64_sdwa v[2:3], v[4:5] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported - -buffer_invl2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -global_load_dword v2, v[2:3], off glc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -global_load_dword v2, v[2:3], off slc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -global_load_dword v2, v[2:3], off scc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -s_load_dword s2, s[2:3], 0x0 sc0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -buffer_atomic_swap v5, off, s[8:11], s3 glc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -buffer_atomic_swap v5, off, s[8:11], s3 slc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -buffer_wbl2 glc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -buffer_wbl2 scc -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_dot2_u32_u16 v0, 1, v0, s2 op_sel:[0,1,0,1] op_sel_hi:[0,0,1,1] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand - -v_cvt_f32_fp8 v1, sext(v3) src0_sel:BYTE_1 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_cvt_pk_f32_bf8 v[2:3], sext(v3) src0_sel:BYTE_1 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_cvt_sr_bf8_f32 v1, v2, -v3 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_cvt_sr_fp8_f32 v1, v2, -v3 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_cvt_sr_fp8_f32 v1, v2, v3 clamp -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cvt_sr_fp8_f32 v1, v2, v3 mul:2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -v_cvt_pk_fp8_f32 v1, v2, v3 clamp -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cvt_pk_fp8_f32 v1, v2, v3 mul:2 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. - -s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_LO) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_HI) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -s_getreg_b32 s1, hwreg(HW_REG_HW_ID1) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -s_getreg_b32 s1, hwreg(HW_REG_HW_ID2) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -s_getreg_b32 s1, hwreg(HW_REG_POPS_PACKER) -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU - -ds_ordered_count v5, v1 offset:65535 gds -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -exp pos0 v3, v2, v1, v0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - -global_load_dword v[2:3], off lds -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -scratch_load_dword v2, off lds -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx942_asm_features.s similarity index 53% rename from llvm/test/MC/AMDGPU/gfx940_asm_features.s rename to llvm/test/MC/AMDGPU/gfx942_asm_features.s index e2e84f27b828a..f3086bad33402 100644 --- a/llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx942_asm_features.s @@ -1,776 +1,776 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX90A --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX940,GFX10 --implicit-check-not=error: %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck --check-prefix=GFX942 --strict-whitespace %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX942 --strict-whitespace %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX942,GFX90A --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX942,GFX10 --implicit-check-not=error: %s -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off nosc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off nosc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off nt -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: global_load_dword v2, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0x7f,0x02] global_load_dword v2, v[2:3], off nont -// GFX940: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] +// GFX942: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] s_load_dword s2, s[2:3], 0x0 glc -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f32 v[2:3], v1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_add_f32 v[2:3], v1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x00,0x00] flat_atomic_add_f32 v[2:3], v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f32 v[2:3], a1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x80,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_add_f32 v[2:3], a1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x80,0x00] flat_atomic_add_f32 v[2:3], a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f32 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x00,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_add_f32 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x00,0x04] flat_atomic_add_f32 v4, v[2:3], v1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f32 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x80,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_add_f32 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x80,0x04] flat_atomic_add_f32 a4, v[2:3], a1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_f16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x00,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_f16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x00,0x04] flat_atomic_pk_add_f16 v4, v[2:3], v1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_f16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x80,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_f16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x80,0x04] flat_atomic_pk_add_f16 a4, v[2:3], a1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_f16 v[2:3], v1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_f16 v[2:3], v1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x00,0x00] flat_atomic_pk_add_f16 v[2:3], v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_f16 v[2:3], a1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x80,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_f16 v[2:3], a1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x80,0x00] flat_atomic_pk_add_f16 v[2:3], a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_bf16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x00,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_bf16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x00,0x04] flat_atomic_pk_add_bf16 v4, v[2:3], v1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_bf16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x80,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_bf16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x80,0x04] flat_atomic_pk_add_bf16 a4, v[2:3], a1 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_bf16 v[2:3], v1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_bf16 v[2:3], v1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x00,0x00] flat_atomic_pk_add_bf16 v[2:3], v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: flat_atomic_pk_add_bf16 v[2:3], a1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x80,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: flat_atomic_pk_add_bf16 v[2:3], a1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x80,0x00] flat_atomic_pk_add_bf16 v[2:3], a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_atomic_pk_add_bf16 v4, v[2:3], v1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0x7f,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_atomic_pk_add_bf16 v4, v[2:3], v1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0x7f,0x04] global_atomic_pk_add_bf16 v4, v[2:3], v1, off sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_atomic_pk_add_bf16 a4, v[2:3], a1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0xff,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_atomic_pk_add_bf16 a4, v[2:3], a1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0xff,0x04] global_atomic_pk_add_bf16 a4, v[2:3], a1, off sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_atomic_pk_add_bf16 v[2:3], v1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_atomic_pk_add_bf16 v[2:3], v1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0x7f,0x00] global_atomic_pk_add_bf16 v[2:3], v1, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_atomic_pk_add_bf16 v[2:3], a1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0xff,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_atomic_pk_add_bf16 v[2:3], a1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0xff,0x00] global_atomic_pk_add_bf16 v[2:3], a1, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_f16 v2, v1 ; encoding: [0x00,0x00,0x2e,0xd8,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_f16 v2, v1 ; encoding: [0x00,0x00,0x2e,0xd8,0x02,0x01,0x00,0x00] ds_pk_add_f16 v2, v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_f16 v2, a1 ; encoding: [0x00,0x00,0x2e,0xda,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_f16 v2, a1 ; encoding: [0x00,0x00,0x2e,0xda,0x02,0x01,0x00,0x00] ds_pk_add_f16 v2, a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_rtn_f16 v3, v2, v1 ; encoding: [0x00,0x00,0x6e,0xd9,0x02,0x01,0x00,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_rtn_f16 v3, v2, v1 ; encoding: [0x00,0x00,0x6e,0xd9,0x02,0x01,0x00,0x03] ds_pk_add_rtn_f16 v3, v2, v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_rtn_f16 a3, v2, a1 ; encoding: [0x00,0x00,0x6e,0xdb,0x02,0x01,0x00,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_rtn_f16 a3, v2, a1 ; encoding: [0x00,0x00,0x6e,0xdb,0x02,0x01,0x00,0x03] ds_pk_add_rtn_f16 a3, v2, a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x01,0x00,0x00] ds_pk_add_bf16 v2, v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_bf16 v2, a1 ; encoding: [0x00,0x00,0x30,0xda,0x02,0x01,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_bf16 v2, a1 ; encoding: [0x00,0x00,0x30,0xda,0x02,0x01,0x00,0x00] ds_pk_add_bf16 v2, a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_rtn_bf16 v3, v2, v1 ; encoding: [0x00,0x00,0x70,0xd9,0x02,0x01,0x00,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_rtn_bf16 v3, v2, v1 ; encoding: [0x00,0x00,0x70,0xd9,0x02,0x01,0x00,0x03] ds_pk_add_rtn_bf16 v3, v2, v1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] ds_pk_add_rtn_bf16 a3, v2, a1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_dword v[2:3], off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] global_load_lds_dword v[2:3], off sc0 nt sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_dword v[2:3], off offset:4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] global_load_lds_dword v2, s[4:5] offset:4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_ubyte v[2:3], off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_sbyte v[2:3], off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_sshort v[2:3], off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] global_load_lds_ushort v[2:3], off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] scratch_load_lds_dword v2, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] scratch_load_lds_dword v2, s4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] scratch_load_lds_dword v2, s4 offset:4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] scratch_load_lds_dword off, s4 offset:4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: -// GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX942: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] scratch_load_lds_dword off, off offset:4 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] scratch_load_lds_ubyte v2, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] scratch_load_lds_sbyte v2, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] scratch_load_lds_ushort v2, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] scratch_load_lds_sshort v2, off -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(19) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(20) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(21) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(22) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(23) -// GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(24) -// GFX940: s_getreg_b32 s1, hwreg(25) ; encoding: [0x19,0xf8,0x81,0xb8] +// GFX942: s_getreg_b32 s1, hwreg(25) ; encoding: [0x19,0xf8,0x81,0xb8] s_getreg_b32 s1, hwreg(25) -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] v_mov_b64 v[2:3], v[4:5] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_mov_b64_dpp v[2:3], v[4:5] row_newbcast:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x04,0x7e,0x04,0x51,0x01,0xff] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_mov_b64_dpp v[2:3], v[4:5] row_newbcast:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x04,0x7e,0x04,0x51,0x01,0xff] v_mov_b64 v[2:3], v[4:5] row_newbcast:1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_mov_b64_e32 v[2:3], s[4:5] ; encoding: [0x04,0x70,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_mov_b64_e32 v[2:3], s[4:5] ; encoding: [0x04,0x70,0x04,0x7e] v_mov_b64 v[2:3], s[4:5] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_mov_b64_e32 v[2:3], 1 ; encoding: [0x81,0x70,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_mov_b64_e32 v[2:3], 1 ; encoding: [0x81,0x70,0x04,0x7e] v_mov_b64 v[2:3], 1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] v_mov_b64 v[2:3], 0x64 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x0e,0x22,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x0e,0x22,0x04] v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x01,0x05,0x02] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x01,0x05,0x02] v_lshl_add_u64 v[2:3], v[4:5], 0, 1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x07,0x09,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x07,0x09,0x00] v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x08,0x09,0x04] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x08,0x09,0x04] v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] +// GFX942: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] buffer_wbl2 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] +// GFX942: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] buffer_wbl2 sc0 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] +// GFX942: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] buffer_wbl2 sc0 sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] buffer_inv sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] buffer_inv sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] buffer_inv sc0 sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_swap v5, off, s[8:11], s3 sc0 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX940: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX942: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_swap v5, off, s[8:11], s3 nt // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x08] v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x09] +// GFX942: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x09] v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x08] v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], flat_scratch, v[4:5] ; encoding: [0x66,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], flat_scratch, v[4:5] ; encoding: [0x66,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], flat_scratch, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], vcc, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], exec, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], 0, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], -1, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], 0.5, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x08] v_fmac_f64_e32 v[4:5], -4.0, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x56,0x34,0x12,0xaf] +// GFX942: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x56,0x34,0x12,0xaf] v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x73,0x72,0x71,0x3f] +// GFX942: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x73,0x72,0x71,0x3f] v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x08] +// GFX942: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x08] v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[254:255], v[2:3], v[8:9] ; encoding: [0xfe,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[254:255], v[2:3], v[8:9] ; encoding: [0xfe,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[254:255], v[2:3], v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[254:255], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xfe,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[254:255], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xfe,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], v[254:255], v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], flat_scratch, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x66,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], flat_scratch, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x66,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], flat_scratch, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], vcc, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x6a,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], vcc, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x6a,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], vcc, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], exec, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x7e,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], exec, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x7e,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], exec, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], 0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x80,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], 0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x80,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], 0, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], -1, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xc1,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], -1, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xc1,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], -1, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], 0.5, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf0,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], 0.5, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf0,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], 0.5, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], -4.0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf7,0x10,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], -4.0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf7,0x10,0x02,0x00] v_fmac_f64_e64 v[4:5], -4.0, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[254:255] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x03,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[254:255] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x03,0x00] v_fmac_f64_e64 v[4:5], v[2:3], v[254:255] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], flat_scratch ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xcd,0x00,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], flat_scratch ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xcd,0x00,0x00] v_fmac_f64_e64 v[4:5], v[2:3], flat_scratch // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xd5,0x00,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xd5,0x00,0x00] v_fmac_f64_e64 v[4:5], v[2:3], vcc // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x00,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x00,0x00] v_fmac_f64_e64 v[4:5], v[2:3], exec // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x01,0x01,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x01,0x01,0x00] v_fmac_f64_e64 v[4:5], v[2:3], 0 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x83,0x01,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x83,0x01,0x00] v_fmac_f64_e64 v[4:5], v[2:3], -1 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xe1,0x01,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xe1,0x01,0x00] v_fmac_f64_e64 v[4:5], v[2:3], 0.5 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xef,0x01,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xef,0x01,0x00] v_fmac_f64_e64 v[4:5], v[2:3], -4.0 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x20] +// GFX942: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x20] v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x40] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x40] v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x60] +// GFX942: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x60] v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x04,0xd1,0x02,0x11,0x02,0x00] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x04,0xd1,0x02,0x11,0x02,0x00] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x08] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x08] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x10] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x10] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 // GFX10: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] +// GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 // GFX90A: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] +// GFX942: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] v_fmamk_f32 v0, v2, 100.0, v3 // GFX90A: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42] +// GFX942: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42] v_fmaak_f32 v0, v2, v3, 100.0 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_add_f32 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_add_f32 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x7f,0x00] global_atomic_add_f32 v0, v[0:1], v2, off sc0 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_add_f32 v[0:1], v2, off sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_add_f32 v[0:1], v2, off sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x7f,0x00] global_atomic_add_f32 v[0:1], v2, off sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_add_f32 v0, v2, s[0:1] sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x00,0x00] +// GFX942: global_atomic_add_f32 v0, v2, s[0:1] sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x00,0x00] global_atomic_add_f32 v0, v2, s[0:1] sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_add_f32 v1, v0, v2, s[0:1] sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x00,0x01] +// GFX942: global_atomic_add_f32 v1, v0, v2, s[0:1] sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x00,0x01] global_atomic_add_f32 v1, v0, v2, s[0:1] sc0 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x39,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x39,0xdf,0x00,0x02,0x7f,0x00] global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; encoding: [0x00,0x00,0x3d,0xdf,0x00,0x02,0x00,0x00] +// GFX942: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; encoding: [0x00,0x00,0x3d,0xdf,0x00,0x02,0x00,0x00] flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x3c,0xdf,0x00,0x02,0x00,0x00] +// GFX942: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x3c,0xdf,0x00,0x02,0x00,0x00] flat_atomic_add_f64 v[0:1], v[2:3] sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: flat_atomic_min_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x40,0xdf,0x00,0x02,0x00,0x00] +// GFX942: flat_atomic_min_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x40,0xdf,0x00,0x02,0x00,0x00] flat_atomic_min_f64 v[0:1], v[2:3] sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: flat_atomic_max_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x44,0xdf,0x00,0x02,0x00,0x00] +// GFX942: flat_atomic_max_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x44,0xdf,0x00,0x02,0x00,0x00] flat_atomic_max_f64 v[0:1], v[2:3] sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_add_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x3c,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_add_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x3c,0xdf,0x00,0x02,0x7f,0x00] global_atomic_add_f64 v[0:1], v[2:3], off sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_min_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x40,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_min_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x40,0xdf,0x00,0x02,0x7f,0x00] global_atomic_min_f64 v[0:1], v[2:3], off sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] +// GFX942: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] global_atomic_max_f64 v[0:1], v[2:3], off sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] +// GFX942: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] +// GFX942: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] +// GFX942: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] +// GFX942: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 // GFX90A: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction // GFX10: error: instruction not supported on this GPU -// GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] +// GFX942: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] v_cvt_f32_bf8 v1, s3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] v_cvt_f32_bf8 v1, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] v_cvt_f32_bf8 v1, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] v_cvt_f32_bf8 v1, s3 src0_sel:BYTE_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] v_cvt_f32_bf8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] v_cvt_f32_bf8 v1, s3 mul:2 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] v_cvt_f32_bf8 v1, s3 clamp mul:2 src0_sel:BYTE_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] v_cvt_f32_bf8 v1, s3 clamp -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] v_cvt_f32_fp8 v1, s3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] v_cvt_f32_fp8 v1, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] v_cvt_f32_fp8 v1, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] v_cvt_f32_fp8 v1, s3 src0_sel:BYTE_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] v_cvt_f32_fp8 v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] v_cvt_f32_fp8 v1, s3 mul:2 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] v_cvt_f32_fp8 v1, s3 clamp mul:2 src0_sel:BYTE_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] v_cvt_f32_fp8 v1, s3 clamp -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] v_cvt_f32_fp8 v1, 3 src0_sel:BYTE_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] v_cvt_pk_f32_bf8 v[2:3], s3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] v_cvt_pk_f32_bf8 v[2:3], 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] v_cvt_pk_f32_bf8 v[2:3], v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] v_cvt_pk_f32_bf8 v[2:3], s3 src0_sel:WORD_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] v_cvt_pk_f32_bf8 v[0:1], v3 row_newbcast:3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] v_cvt_pk_f32_bf8 v[2:3], s3 mul:2 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] v_cvt_pk_f32_bf8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] v_cvt_pk_f32_bf8 v[2:3], s3 clamp -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] v_cvt_pk_f32_fp8 v[2:3], s3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] v_cvt_pk_f32_fp8 v[2:3], 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] v_cvt_pk_f32_fp8 v[2:3], v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] v_cvt_pk_f32_fp8 v[2:3], s3 src0_sel:WORD_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] v_cvt_pk_f32_fp8 v[2:3], 3 src0_sel:WORD_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] v_cvt_pk_f32_fp8 v[0:1], v3 row_newbcast:3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] v_cvt_pk_f32_fp8 v[2:3], s3 mul:2 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] v_cvt_pk_f32_fp8 v[2:3], s3 clamp mul:2 src0_sel:WORD_1 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] v_cvt_pk_f32_fp8 v[2:3], s3 clamp -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_bf8_f32 v1, v2, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] v_cvt_pk_bf8_f32 v1, -v2, |v3| -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] v_cvt_pk_bf8_f32 v1, s2, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_fp8_f32 v1, v2, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] v_cvt_pk_fp8_f32 v1, -v2, |v3| -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] v_cvt_pk_fp8_f32 v1, s2, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_bf8_f32 v1, v2, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] v_cvt_sr_bf8_f32 v1, s2, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] v_cvt_sr_bf8_f32 v1, -|s2|, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_fp8_f32 v1, v2, v3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] v_cvt_sr_fp8_f32 v1, s2, 3 -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] -// NOT-GFX940: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] +// NOT-GFX942: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] v_cvt_sr_fp8_f32 v1, -|s2|, v3 diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s new file mode 100644 index 0000000000000..fd59a01b34a04 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx942_err.s @@ -0,0 +1,127 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 --implicit-check-not=error: %s + +v_mac_f32 v0, v1, v2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mac_f32_e64 v5, v1, v2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mac_f32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mad_f32 v0, v1, v2, v3 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_madak_f32 v0, v1, v2, 0 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_madmk_f32 v0, v1, 0, v2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mad_legacy_f32 v0, v1, v2, v3 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_mov_b64 v[2:3], v[4:5] row_shl:1 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_newbcast + +v_mov_b64 v[2:3], -v[4:5] +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_mov_b64 v[2:3], |v[4:5]| +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_mov_b64 v[2:3], v[4:5] dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_mov_b64_sdwa v[2:3], v[4:5] +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported + +buffer_invl2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_dword v2, v[2:3], off glc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +global_load_dword v2, v[2:3], off slc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +global_load_dword v2, v[2:3], off scc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +s_load_dword s2, s[2:3], 0x0 sc0 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +buffer_atomic_swap v5, off, s[8:11], s3 glc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +buffer_atomic_swap v5, off, s[8:11], s3 slc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +buffer_wbl2 glc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +buffer_wbl2 scc +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_u32_u16 v0, 1, v0, s2 op_sel:[0,1,0,1] op_sel_hi:[0,0,1,1] +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_cvt_f32_fp8 v1, sext(v3) src0_sel:BYTE_1 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_cvt_pk_f32_bf8 v[2:3], sext(v3) src0_sel:BYTE_1 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_cvt_sr_bf8_f32 v1, v2, -v3 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_cvt_sr_fp8_f32 v1, v2, -v3 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_cvt_sr_fp8_f32 v1, v2, v3 clamp +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_sr_fp8_f32 v1, v2, v3 mul:2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_cvt_pk_fp8_f32 v1, v2, v3 clamp +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cvt_pk_fp8_f32 v1, v2, v3 mul:2 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_LO) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +s_getreg_b32 s1, hwreg(HW_REG_FLAT_SCR_HI) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +s_getreg_b32 s1, hwreg(HW_REG_XNACK_MASK) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +s_getreg_b32 s1, hwreg(HW_REG_HW_ID1) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +s_getreg_b32 s1, hwreg(HW_REG_HW_ID2) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +s_getreg_b32 s1, hwreg(HW_REG_POPS_PACKER) +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid hardware register: not supported on this GPU + +ds_ordered_count v5, v1 offset:65535 gds +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +exp pos0 v3, v2, v1, v0 +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_dword v[2:3], off lds +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +scratch_load_dword v2, off lds +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx940_err_pos.s b/llvm/test/MC/AMDGPU/gfx942_err_pos.s similarity index 90% rename from llvm/test/MC/AMDGPU/gfx940_err_pos.s rename to llvm/test/MC/AMDGPU/gfx942_err_pos.s index 3823c0eb80277..6ffb2278884ef 100644 --- a/llvm/test/MC/AMDGPU/gfx940_err_pos.s +++ b/llvm/test/MC/AMDGPU/gfx942_err_pos.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace //============================================================================== // instruction must not use sc0 diff --git a/llvm/test/MC/AMDGPU/gfx940_unsupported.s b/llvm/test/MC/AMDGPU/gfx942_unsupported.s similarity index 85% rename from llvm/test/MC/AMDGPU/gfx940_unsupported.s rename to llvm/test/MC/AMDGPU/gfx942_unsupported.s index 4ef53c7d95239..4470fa4604833 100644 --- a/llvm/test/MC/AMDGPU/gfx940_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx942_unsupported.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=CHECK --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=CHECK --implicit-check-not=error: %s buffer_store_lds_dword s[4:7], -1 offset:4095 lds // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 389b17296c045..5d6adc4095277 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 --strict-whitespace %s -// xUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX940 --implicit-check-not=error: %s +// xUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX942 --implicit-check-not=error: %s // xUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX90A --implicit-check-not=error: %s // xUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=NOT-GFX950,GFX10 --implicit-check-not=error: %s diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s b/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s index e6606ac8b72d0..b9925d17ac887 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_read_tr.s @@ -1,42 +1,42 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942-ERR --implicit-check-not=error: %s ds_read_b64_tr_b4 v[0:1], v1 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x00,0x00,0xc0,0xd9,0x01,0x00,0x00,0x00] ds_read_b64_tr_b4 v[2:3], v3 offset:64 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x40,0x00,0xc0,0xd9,0x03,0x00,0x00,0x02] ds_read_b64_tr_b8 v[0:1], v1 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x00,0x00,0xc4,0xd9,0x01,0x00,0x00,0x00] ds_read_b64_tr_b8 v[2:3], v3 offset:64 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x40,0x00,0xc4,0xd9,0x03,0x00,0x00,0x02] ds_read_b64_tr_b16 v[0:1], v1 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x00,0x00,0xc6,0xd9,0x01,0x00,0x00,0x00] ds_read_b64_tr_b16 v[2:3], v3 offset:64 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x40,0x00,0xc6,0xd9,0x03,0x00,0x00,0x02] ds_read_b96_tr_b6 v[0:2], v0 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x00] ds_read_b96_tr_b6 v[2:4], v2 offset:64 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x02] ds_read_b96_tr_b6 v[1:3], v0 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x00,0x00,0xc2,0xd9,0x00,0x00,0x00,0x01] ds_read_b96_tr_b6 v[1:3], v2 offset:64 -// GFX940-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: [[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU // GFX950: encoding: [0x40,0x00,0xc2,0xd9,0x02,0x00,0x00,0x01] diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s index 66dae85ee8e3e..8753119c7826d 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s @@ -1,130 +1,130 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940-ERR --strict-whitespace %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck -check-prefix=GFX942-ERR --strict-whitespace %s v_prng_b32 v5, v1 // GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, v255 // GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, s1 // GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, s101 // GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, vcc_lo // GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, vcc_hi // GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, ttmp15 // GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, m0 // GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, exec_lo // GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, exec_hi // GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, -1 // GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, 0.5 // GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v5, src_scc // GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_prng_b32 v255, 0xaf123456 // GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, v1 // GFX950: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xb7,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, v127 // GFX950: v_cvt_f32_bf16_e32 v5, v127 ; encoding: [0x7f,0xb7,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, s1 // GFX950: v_cvt_f32_bf16_e32 v5, s1 ; encoding: [0x01,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, vcc_lo // GFX950: v_cvt_f32_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, vcc_hi // GFX950: v_cvt_f32_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, ttmp15 // GFX950: v_cvt_f32_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, m0 // GFX950: v_cvt_f32_bf16_e32 v5, m0 ; encoding: [0x7c,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, exec_lo // GFX950: v_cvt_f32_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, exec_hi // GFX950: v_cvt_f32_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, -1 // GFX950: v_cvt_f32_bf16_e32 v5, -1 ; encoding: [0xc1,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, 0.5 // GFX950: v_cvt_f32_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, src_scc // GFX950: v_cvt_f32_bf16_e32 v5, src_scc ; encoding: [0xfd,0xb6,0x0a,0x7e] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v127, 0x8000 // GFX950: v_cvt_f32_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xb6,0xfe,0x7e,0x00,0x80,0x00,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, -v1 // GFX950: v_cvt_f32_bf16_e64 v5, -v1 ; encoding: [0x05,0x00,0x9b,0xd1,0x01,0x01,0x00,0x20] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, |v1| // GFX950: v_cvt_f32_bf16_e64 v5, |v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x00] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, -|v1| // GFX950: v_cvt_f32_bf16_e64 v5, -|v1| ; encoding: [0x05,0x01,0x9b,0xd1,0x01,0x01,0x00,0x20] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16 v5, v1 clamp mul:2 // GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp mul:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x08] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_cvt_f32_bf16_e64 v5, v1 clamp div:2 // GFX950: v_cvt_f32_bf16_e64 v5, v1 clamp div:2 ; encoding: [0x05,0x80,0x9b,0xd1,0x01,0x01,0x00,0x18] -// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s index c271d12579f34..faccfbfe11f29 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop3.s @@ -1,148 +1,148 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX906-ERR %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX940-ERR %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX942-ERR %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding < %s | FileCheck --check-prefix=GFX950 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s v_cvt_pk_bf16_f32 v5, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x02,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, v255, v255 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd2,0xff,0xff,0x03,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, v1, s2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd2,0x01,0x05,0x00,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, m0, 0.5 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd2,0x7c,0xe0,0x01,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, -1, exec_hi // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd2,0xc1,0xfe,0x00,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x68,0xd2,0xf0,0xf8,0x00,0x08] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b32 v5, v1, v2, s3 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd2,0x01,0x05,0x0e,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b32 v5, v1, v2, s3 bitop3:161 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd2,0x01,0x05,0x0e,0x30] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd2,0x7c,0xe0,0xf1,0xa1] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd2,0xf0,0xf8,0xc0,0xab] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b16 v5, v1, v2, s3 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd2,0x01,0x05,0x0e,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_bitop3_b16 v5, v1, v2, s3 bitop3:161 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd2,0x01,0x05,0x0e,0x30] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v2, s4, v7, v8 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0e,0x22,0x04] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v2, v4, 0, 1 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x01,0x05,0x02] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v2, v4, 3, s2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x07,0x09,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v2, s4, 4, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x08,0x09,0x04] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v2, v4, v7, 0.5 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v2, v4, v7, 0.5 ; encoding: [0x02,0x00,0x65,0xd2,0x04,0x0f,0xc2,0x03] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_i8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x65,0xd2,0x02,0x07,0x12,0x04] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v2, s4, v7, v8 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0e,0x22,0x04] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v2, v4, 0, 1 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x01,0x05,0x02] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v2, v4, 3, s2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x07,0x09,0x00] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v2, s4, 4, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x08,0x09,0x04] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v2, v4, v7, -2.0 // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v2, v4, v7, -2.0 ; encoding: [0x02,0x00,0x66,0xd2,0x04,0x0f,0xd6,0x03] // GFX12-ERR: error: instruction not supported on this GPU v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] // GFX906-ERR: error: instruction not supported on this GPU -// GFX940-ERR: error: instruction not supported on this GPU +// GFX942-ERR: error: instruction not supported on this GPU // GFX950: v_ashr_pk_u8_i32 v1, v2, v3, v4 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x66,0xd2,0x02,0x07,0x12,0x04] // GFX12-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/mai-err-gfx940.s b/llvm/test/MC/AMDGPU/mai-err-gfx942.s similarity index 50% rename from llvm/test/MC/AMDGPU/mai-err-gfx940.s rename to llvm/test/MC/AMDGPU/mai-err-gfx942.s index 810788555a71e..22844ca90808d 100644 --- a/llvm/test/MC/AMDGPU/mai-err-gfx940.s +++ b/llvm/test/MC/AMDGPU/mai-err-gfx942.s @@ -1,46 +1,46 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=GFX940 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck -check-prefix=GFX942 %s v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, 0 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: neg is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31] neg:[1,0,0] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: neg is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] blgp:7 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] blgp:7 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] blgp:7 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] blgp:7 -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid modifier: blgp is not supported v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[0:15] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[0:3] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[2:9] -// GFX940: :[[@LINE-1]]:{{[0-9]+}}: error: source 2 operand must not partially overlap with dst +// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: source 2 operand must not partially overlap with dst diff --git a/llvm/test/MC/AMDGPU/mai-gfx940.s b/llvm/test/MC/AMDGPU/mai-gfx942.s similarity index 74% rename from llvm/test/MC/AMDGPU/mai-gfx940.s rename to llvm/test/MC/AMDGPU/mai-gfx942.s index f6343ad26cfa4..aa27a566dc512 100644 --- a/llvm/test/MC/AMDGPU/mai-gfx940.s +++ b/llvm/test/MC/AMDGPU/mai-gfx942.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck -check-prefix=GFX940 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck -check-prefix=GFX942 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck -check-prefix=GFX90A %s //===----------------------------------------------------------------------===// @@ -6,562 +6,562 @@ //===----------------------------------------------------------------------===// v_accvgpr_write_b32 a10, s20 -// GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] +// GFX942: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] //===----------------------------------------------------------------------===// // MFMA opcodes. //===----------------------------------------------------------------------===// v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] -// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14] +// GFX942: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14] v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3] -// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14] +// GFX942: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x14] v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x54] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,1,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x54] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x94] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[0,0,1] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x94] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1] -// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0xf4] +// GFX942: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,1,1] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0xf4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_4x4x4f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] -// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34] +// GFX942: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] neg:[1,0,0] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x34] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0] -// GFX940: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x34] +// GFX942: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], a[2:3], v[2:3] neg:[1,0,0] ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0x0a,0x34] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] -// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04] +// GFX942: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] -// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04] +// GFX942: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] -// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04] +// GFX942: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x04] v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] -// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04] +// GFX942: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x04] v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1] -// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0xe4] +// GFX942: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,1,1] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1] -// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0xe4] +// GFX942: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,1,1] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0] -// GFX940: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x24] +// GFX942: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] neg:[1,0,0] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x02,0x24] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0] -// GFX940: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x24] +// GFX942: v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] neg:[1,0,0] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x02,0x24] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: invalid modifier: neg is not supported v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04] v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x04] v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc2,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc2,0xd3,0x00,0x03,0x0a,0x04] v_mfma_f32_4x4x1f32 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x0a,0x04] v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x04] v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x04] v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc5,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc5,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc5,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xc5,0xd3,0x00,0x03,0x0a,0x04] v_mfma_f32_16x16x4f32 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x0a,0x04] v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x04] v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x04] v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4_4b_f16 v[0:15], v[0:1], v[2:3], v[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x04] v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x04] v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xca,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xca,0xd3,0x00,0x05,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x05,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[2:3], a[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xca,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xca,0xd3,0x00,0x05,0x0a,0x04] v_mfma_f32_4x4x4f16 v[0:3], v[0:1], v[2:3], v[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x05,0x0a,0x04] v_mfma_f32_32x32x8_f16 a[0:15], v[0:1], v[2:3], a[18:33] -// GFX940: v_mfma_f32_32x32x8_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], v[18:33] -// GFX940: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[18:33] -// GFX940: v_mfma_f32_32x32x8_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x04] v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[18:33] -// GFX940: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x04] v_mfma_f32_16x16x16_f16 a[0:3], v[0:1], v[2:3], a[2:5] -// GFX940: v_mfma_f32_16x16x16_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xcd,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xcd,0xd3,0x00,0x05,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16_f16 v[0:3], v[0:1], v[2:3], v[2:5] -// GFX940: v_mfma_f32_16x16x16_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x05,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[2:3], a[2:5] -// GFX940: v_mfma_f32_16x16x16_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xcd,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_f16 a[0:3], v[0:1], v[2:3], a[2:5] ; encoding: [0x00,0x80,0xcd,0xd3,0x00,0x05,0x0a,0x04] v_mfma_f32_16x16x16f16 v[0:3], v[0:1], v[2:3], v[2:5] -// GFX940: v_mfma_f32_16x16x16_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x05,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_f16 v[0:3], v[0:1], v[2:3], v[2:5] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x05,0x0a,0x04] v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] -// GFX940: v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x04] +// GFX942: v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, a1, v[34:65] -// GFX940: v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x14] +// GFX942: v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[34:65] -// GFX940: v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x04] +// GFX942: v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x04] v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[34:65] -// GFX940: v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x14] +// GFX942: v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x14] v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[18:33] -// GFX940: v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x04] v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[18:33] -// GFX940: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x04] +// GFX942: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x04] v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xd2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xd2,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[2:5] -// GFX940: v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xd2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] ; encoding: [0x00,0x80,0xd2,0xd3,0x00,0x03,0x0a,0x04] v_mfma_i32_4x4x4i8 v[0:3], v0, v1, v[2:5] -// GFX940: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x04] +// GFX942: v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x04] v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0xe4] v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[34:65] blgp:7 -// GFX940: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] +// GFX942: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0xe4] v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_32x32x16i8 v[0:15], v[2:3], v[4:5], v[0:15] blgp:5 -// GFX940: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] blgp:5 ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] blgp:5 ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] -// GFX940: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] -// GFX940: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x32i8 v[0:3], v[2:3], v[4:5], v[0:3] blgp:5 -// GFX940: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] blgp:5 ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] blgp:5 ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_i32_16x16x32i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] -// GFX940: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0x04] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4bf16 v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 -// GFX940: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0xa4] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_mfma_f32_32x32x4bf16 a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 -// GFX940: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0xa4] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_mfma_f32_32x32x4bf16_1k v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 -// GFX940: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0xa4] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[34:65] blgp:5 ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x8a,0xa4] v_mfma_f32_32x32x4bf16_1k a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 -// GFX940: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0xa4] +// GFX942: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[34:65] blgp:5 ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x8a,0xa4] v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4bf16 v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 -// GFX940: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0xa4] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4bf16 a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 -// GFX940: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0xa4] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x4bf16_1k v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 -// GFX940: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0xa4] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] blgp:5 ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x4a,0xa4] v_mfma_f32_16x16x4bf16_1k a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 -// GFX940: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0xa4] +// GFX942: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] blgp:5 ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x4a,0xa4] v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4bf16 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4bf16 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_4x4x4bf16_1k v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] v_mfma_f32_4x4x4bf16_1k a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8bf16 v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8bf16 a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x8bf16_1k v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x4a,0x04] v_mfma_f32_32x32x8bf16_1k a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x4a,0x04] v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16bf16 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16bf16 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x16bf16_1k v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] v_mfma_f32_16x16x16bf16_1k a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x8xf32 a[0:3], v[2:3], v[4:5], a[2:5] -// GFX940: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x8xf32 v[0:3], v[2:3], v[4:5], v[2:5] -// GFX940: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] +// GFX942: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4xf32 v[0:15], v[2:3], v[4:5], v[18:33] -// GFX940: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[18:33] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x4xf32 a[0:15], v[2:3], v[4:5], a[18:33] -// GFX940: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x4a,0x04] +// GFX942: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[18:33] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x4a,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] -// GFX940: v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf0,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf0,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] -// GFX940: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] -// GFX940: v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf1,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf1,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] -// GFX940: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] -// GFX940: v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf2,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf2,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] -// GFX940: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] -// GFX940: v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf3,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf3,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] -// GFX940: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 -// GFX940: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf4,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf4,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf5,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf5,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] -// GFX940: v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf7,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf7,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] -// GFX940: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0x04] +// GFX942: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0x04] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 -// GFX940: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0xa4] +// GFX942: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0xa4] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// @@ -569,115 +569,115 @@ v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 //===----------------------------------------------------------------------===// v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] +// GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] +// GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 -// GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14] +// GFX942: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] +// GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 -// GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14] +// GFX942: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] +// GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v7 -// GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v7 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x1e,0x14] +// GFX942: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v7 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x1e,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 -// GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x22,0x0c] +// GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x22,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 -// GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x26,0x14] +// GFX942: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x26,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 -// GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x2a,0x0c] +// GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x2a,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 -// GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x2e,0x14] +// GFX942: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x2e,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf8,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf8,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf9,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf9,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfa,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfa,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfb,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfb,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfc,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfc,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfd,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfd,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfe,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfe,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 -// GFX940: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14] +// GFX942: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU //===----------------------------------------------------------------------===// @@ -685,57 +685,57 @@ v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 //===----------------------------------------------------------------------===// v_smfmac_f32_16x16x32f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] +// GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] +// GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x32bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] +// GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x16bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] +// GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_16x16x64i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 -// GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x22,0x0c] +// GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x22,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_i32_32x32x32i8 a[10:25], v[2:3], a[4:7], v11 -// GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x2e,0x14] +// GFX942: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x2e,0x14] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64bf8bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64bf8fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64fp8bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_16x16x64fp8fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32bf8bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32bf8fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32fp8bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU v_smfmac_f32_32x32x32fp8fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 -// GFX940: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] +// GFX942: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] // GFX90A: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s index 23b1ba2c3cd13..dd090cb73e56d 100644 --- a/llvm/test/MC/AMDGPU/mai-gfx950.s +++ b/llvm/test/MC/AMDGPU/mai-gfx950.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX950 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck -check-prefix=ERR %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck -check-prefix=ERR %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck -check-prefix=ERR %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=ERR %s diff --git a/llvm/test/MC/AMDGPU/mimg-err-gfx940.s b/llvm/test/MC/AMDGPU/mimg-err-gfx942.s similarity index 51% rename from llvm/test/MC/AMDGPU/mimg-err-gfx940.s rename to llvm/test/MC/AMDGPU/mimg-err-gfx942.s index 5d28927456332..9861d577023e2 100644 --- a/llvm/test/MC/AMDGPU/mimg-err-gfx940.s +++ b/llvm/test/MC/AMDGPU/mimg-err-gfx942.s @@ -1,79 +1,79 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck %s --check-prefix=NOGFX940 --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck %s --check-prefix=NOGFX942 --implicit-check-not=error: image_load v[4:6], v[238:241], s[28:35] dmask:0x7 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_load_pck v5, v[0:3], s[8:15] dmask:0x1 glc -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_load_pck_sgn v5, v[0:3], s[8:15] dmask:0x1 lwe -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_load_mip v5, v[0:3], s[8:15] -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU image_load_mip_pck v5, v1, s[8:15] dmask:0x1 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_load_mip_pck_sgn v[4:5], v[0:3], s[8:15] dmask:0x5 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_store v[192:194], v[238:241], s[28:35] dmask:0x7 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_store_pck v1, v[2:5], s[12:19] dmask:0x1 unorm da -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_store_mip v1, v[2:5], s[12:19] -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU image_store_mip_pck v252, v[2:3], s[12:19] dmask:0x1 a16 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_add v4, v192, s[28:35] dmask:0x1 unorm glc -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_and v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_or v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_xor v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_sub v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_smin v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_smax v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_umin v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_umax v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_inc v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_atomic_dec v4, v192, s[28:35] dmask:0x1 unorm -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_get_resinfo v5, v1, s[8:15] dmask:0x1 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_sample v5, v[0:3], s[8:15], s[12:15] dmask:0x1 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x2 -// NOGFX940: :[[@LINE-1]]:{{[0-9]+}}: error: +// NOGFX942: :[[@LINE-1]]:{{[0-9]+}}: error: diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx950.s b/llvm/test/MC/AMDGPU/mubuf-gfx950.s index 0ba6f2ca4f6c4..fff4fc7df5241 100644 --- a/llvm/test/MC/AMDGPU/mubuf-gfx950.s +++ b/llvm/test/MC/AMDGPU/mubuf-gfx950.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck -check-prefix=GFX950 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx803 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1030 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERR %s diff --git a/llvm/test/MC/AMDGPU/writelane_m0.s b/llvm/test/MC/AMDGPU/writelane_m0.s index d12ae7eec67c9..d3cfc7d2ae00c 100644 --- a/llvm/test/MC/AMDGPU/writelane_m0.s +++ b/llvm/test/MC/AMDGPU/writelane_m0.s @@ -1,7 +1,7 @@ // RUN: llvm-mc --triple=amdgcn --mcpu=gfx600 -show-encoding %s | FileCheck %s -check-prefix=GFX6 // RUN: llvm-mc --triple=amdgcn --mcpu=gfx700 -show-encoding %s | FileCheck %s -check-prefix=GFX7 // RUN: llvm-mc --triple=amdgcn --mcpu=gfx904 -show-encoding %s | FileCheck %s -check-prefix=GFX9 -// RUN: llvm-mc --triple=amdgcn --mcpu=gfx940 -show-encoding %s | FileCheck %s -check-prefix=GFX9 +// RUN: llvm-mc --triple=amdgcn --mcpu=gfx942 -show-encoding %s | FileCheck %s -check-prefix=GFX9 // RUN: llvm-mc --triple=amdgcn --mcpu=gfx1010 -show-encoding %s | FileCheck %s -check-prefix=GFX10 // RUN: llvm-mc --triple=amdgcn --mcpu=gfx1030 -show-encoding %s | FileCheck %s -check-prefix=GFX10 // RUN: llvm-mc --triple=amdgcn --mcpu=gfx1100 -show-encoding %s | FileCheck %s -check-prefix=GFX11 diff --git a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s index be2041454b10c..006a71eeff56e 100644 --- a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck %s // CHECK: encoding: [0x01,0x05,0x0a,0x6e] v_dot2c_f32_f16 v5, v1, v2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt index be23058ff424d..12cacbad3e3a8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding < %s | FileCheck %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -disassemble -show-encoding < %s | FileCheck %s # CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6e] 0x01,0x05,0x0a,0x6e diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx942_features.txt similarity index 54% rename from llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt rename to llvm/test/MC/Disassembler/AMDGPU/gfx942_features.txt index 63e425fdb4ec9..c64489e8a49a7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx942_features.txt @@ -1,545 +1,545 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX942 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX942 %s -# GFX940: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] +# GFX942: global_load_dword v2, v[2:3], off sc0 ; encoding: [0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02] 0x00,0x80,0x51,0xdc,0x02,0x00,0x7f,0x02 -# GFX940: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] +# GFX942: global_load_dword v2, v[2:3], off sc1 ; encoding: [0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02] 0x00,0x80,0x50,0xde,0x02,0x00,0x7f,0x02 -# GFX940: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] +# GFX942: global_load_dword v2, v[2:3], off nt ; encoding: [0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02] 0x00,0x80,0x52,0xdc,0x02,0x00,0x7f,0x02 -# GFX940: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] +# GFX942: s_load_dword s2, s[2:3], 0x0 glc ; encoding: [0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00] 0x81,0x00,0x03,0xc0,0x00,0x00,0x00,0x00 -# GFX940: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] +# GFX942: buffer_load_dword v5, off, s[8:11], s3 sc0 nt sc1 ; encoding: [0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03] 0x00,0xc0,0x52,0xe0,0x00,0x05,0x02,0x03 -# GFX940: flat_atomic_add_f32 v[2:3], v1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x00,0x00] +# GFX942: flat_atomic_add_f32 v[2:3], v1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x00,0x00] 0x00,0x00,0x34,0xdd,0x02,0x01,0x00,0x00 -# GFX940: flat_atomic_add_f32 v[2:3], a1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x80,0x00] +# GFX942: flat_atomic_add_f32 v[2:3], a1 ; encoding: [0x00,0x00,0x34,0xdd,0x02,0x01,0x80,0x00] 0x00,0x00,0x34,0xdd,0x02,0x01,0x80,0x00 -# GFX940: flat_atomic_add_f32 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x00,0x04] +# GFX942: flat_atomic_add_f32 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x00,0x04] 0x00,0x00,0x35,0xdd,0x02,0x01,0x00,0x04 -# GFX940: flat_atomic_add_f32 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x80,0x04] +# GFX942: flat_atomic_add_f32 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x35,0xdd,0x02,0x01,0x80,0x04] 0x00,0x00,0x35,0xdd,0x02,0x01,0x80,0x04 -# GFX940: flat_atomic_pk_add_f16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x00,0x04] +# GFX942: flat_atomic_pk_add_f16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x00,0x04] 0x00,0x00,0x39,0xdd,0x02,0x01,0x00,0x04 -# GFX940: flat_atomic_pk_add_f16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x80,0x04] +# GFX942: flat_atomic_pk_add_f16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x39,0xdd,0x02,0x01,0x80,0x04] 0x00,0x00,0x39,0xdd,0x02,0x01,0x80,0x04 -# GFX940: flat_atomic_pk_add_f16 v[2:3], v1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x00,0x00] +# GFX942: flat_atomic_pk_add_f16 v[2:3], v1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x00,0x00] 0x00,0x00,0x38,0xdd,0x02,0x01,0x00,0x00 -# GFX940: flat_atomic_pk_add_f16 v[2:3], a1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x80,0x00] +# GFX942: flat_atomic_pk_add_f16 v[2:3], a1 ; encoding: [0x00,0x00,0x38,0xdd,0x02,0x01,0x80,0x00] 0x00,0x00,0x38,0xdd,0x02,0x01,0x80,0x00 -# GFX940: flat_atomic_pk_add_bf16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x00,0x04] +# GFX942: flat_atomic_pk_add_bf16 v4, v[2:3], v1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x00,0x04] 0x00,0x00,0x49,0xdd,0x02,0x01,0x00,0x04 -# GFX940: flat_atomic_pk_add_bf16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x80,0x04] +# GFX942: flat_atomic_pk_add_bf16 a4, v[2:3], a1 sc0 ; encoding: [0x00,0x00,0x49,0xdd,0x02,0x01,0x80,0x04] 0x00,0x00,0x49,0xdd,0x02,0x01,0x80,0x04 -# GFX940: flat_atomic_pk_add_bf16 v[2:3], v1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x00,0x00] +# GFX942: flat_atomic_pk_add_bf16 v[2:3], v1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x00,0x00] 0x00,0x00,0x48,0xdd,0x02,0x01,0x00,0x00 -# GFX940: flat_atomic_pk_add_bf16 v[2:3], a1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x80,0x00] +# GFX942: flat_atomic_pk_add_bf16 v[2:3], a1 ; encoding: [0x00,0x00,0x48,0xdd,0x02,0x01,0x80,0x00] 0x00,0x00,0x48,0xdd,0x02,0x01,0x80,0x00 -# GFX940: global_atomic_pk_add_bf16 v4, v[2:3], v1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0x7f,0x04] +# GFX942: global_atomic_pk_add_bf16 v4, v[2:3], v1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0x7f,0x04] 0x00,0x80,0x49,0xdd,0x02,0x01,0x7f,0x04 -# GFX940: global_atomic_pk_add_bf16 a4, v[2:3], a1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0xff,0x04] +# GFX942: global_atomic_pk_add_bf16 a4, v[2:3], a1, off sc0 ; encoding: [0x00,0x80,0x49,0xdd,0x02,0x01,0xff,0x04] 0x00,0x80,0x49,0xdd,0x02,0x01,0xff,0x04 -# GFX940: global_atomic_pk_add_bf16 v[2:3], v1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0x7f,0x00] +# GFX942: global_atomic_pk_add_bf16 v[2:3], v1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0x7f,0x00] 0x00,0x80,0x48,0xdd,0x02,0x01,0x7f,0x00 -# GFX940: global_atomic_pk_add_bf16 v[2:3], a1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0xff,0x00] +# GFX942: global_atomic_pk_add_bf16 v[2:3], a1, off ; encoding: [0x00,0x80,0x48,0xdd,0x02,0x01,0xff,0x00] 0x00,0x80,0x48,0xdd,0x02,0x01,0xff,0x00 -# GFX940: ds_pk_add_f16 v2, v1 ; encoding: [0x00,0x00,0x2e,0xd8,0x02,0x01,0x00,0x00] +# GFX942: ds_pk_add_f16 v2, v1 ; encoding: [0x00,0x00,0x2e,0xd8,0x02,0x01,0x00,0x00] 0x00,0x00,0x2e,0xd8,0x02,0x01,0x00,0x00 -# GFX940: ds_pk_add_f16 v2, a1 ; encoding: [0x00,0x00,0x2e,0xda,0x02,0x01,0x00,0x00] +# GFX942: ds_pk_add_f16 v2, a1 ; encoding: [0x00,0x00,0x2e,0xda,0x02,0x01,0x00,0x00] 0x00,0x00,0x2e,0xda,0x02,0x01,0x00,0x00 -# GFX940: ds_pk_add_rtn_f16 v3, v2, v1 ; encoding: [0x00,0x00,0x6e,0xd9,0x02,0x01,0x00,0x03] +# GFX942: ds_pk_add_rtn_f16 v3, v2, v1 ; encoding: [0x00,0x00,0x6e,0xd9,0x02,0x01,0x00,0x03] 0x00,0x00,0x6e,0xd9,0x02,0x01,0x00,0x03 -# GFX940: ds_pk_add_rtn_f16 a3, v2, a1 ; encoding: [0x00,0x00,0x6e,0xdb,0x02,0x01,0x00,0x03] +# GFX942: ds_pk_add_rtn_f16 a3, v2, a1 ; encoding: [0x00,0x00,0x6e,0xdb,0x02,0x01,0x00,0x03] 0x00,0x00,0x6e,0xdb,0x02,0x01,0x00,0x03 -# GFX940: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x01,0x00,0x00] +# GFX942: ds_pk_add_bf16 v2, v1 ; encoding: [0x00,0x00,0x30,0xd8,0x02,0x01,0x00,0x00] 0x00,0x00,0x30,0xd8,0x02,0x01,0x00,0x00 -# GFX940: ds_pk_add_bf16 v2, a1 ; encoding: [0x00,0x00,0x30,0xda,0x02,0x01,0x00,0x00] +# GFX942: ds_pk_add_bf16 v2, a1 ; encoding: [0x00,0x00,0x30,0xda,0x02,0x01,0x00,0x00] 0x00,0x00,0x30,0xda,0x02,0x01,0x00,0x00 -# GFX940: ds_pk_add_rtn_bf16 v3, v2, v1 ; encoding: [0x00,0x00,0x70,0xd9,0x02,0x01,0x00,0x03] +# GFX942: ds_pk_add_rtn_bf16 v3, v2, v1 ; encoding: [0x00,0x00,0x70,0xd9,0x02,0x01,0x00,0x03] 0x00,0x00,0x70,0xd9,0x02,0x01,0x00,0x03 -# GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] +# GFX942: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] 0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03 -# GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] 0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] 0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +# GFX942: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] 0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00 -# GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +# GFX942: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] 0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 -# GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +# GFX942: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] 0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 -# GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +# GFX942: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] 0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00 -# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] 0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] 0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +# GFX942: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] 0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00 -# GFX940: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] +# GFX942: s_getreg_b32 s1, hwreg(HW_REG_XCC_ID) ; encoding: [0x14,0xf8,0x81,0xb8] 0x14,0xf8,0x81,0xb8 -# GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] +# GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA) ; encoding: [0x15,0xf8,0x81,0xb8] 0x15,0xf8,0x81,0xb8 -# GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] +# GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1) ; encoding: [0x16,0xf8,0x81,0xb8] 0x16,0xf8,0x81,0xb8 -# GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] +# GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO) ; encoding: [0x17,0xf8,0x81,0xb8] 0x17,0xf8,0x81,0xb8 -# GFX940: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] +# GFX942: s_getreg_b32 s1, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI) ; encoding: [0x18,0xf8,0x81,0xb8] 0x18,0xf8,0x81,0xb8 -# GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] +# GFX942: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] 0x04,0x71,0x04,0x7e -# GFX940: v_mov_b64_dpp v[2:3], v[4:5] row_newbcast:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x04,0x7e,0x04,0x51,0x01,0xff] +# GFX942: v_mov_b64_dpp v[2:3], v[4:5] row_newbcast:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x04,0x7e,0x04,0x51,0x01,0xff] 0xfa,0x70,0x04,0x7e,0x04,0x51,0x01,0xff -# GFX940: v_mov_b64_e32 v[2:3], s[4:5] ; encoding: [0x04,0x70,0x04,0x7e] +# GFX942: v_mov_b64_e32 v[2:3], s[4:5] ; encoding: [0x04,0x70,0x04,0x7e] 0x04,0x70,0x04,0x7e -# GFX940: v_mov_b64_e32 v[2:3], 1 ; encoding: [0x81,0x70,0x04,0x7e] +# GFX942: v_mov_b64_e32 v[2:3], 1 ; encoding: [0x81,0x70,0x04,0x7e] 0x81,0x70,0x04,0x7e -# GFX940: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] +# GFX942: v_mov_b64_e32 v[2:3], 0x64 ; encoding: [0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00] 0xff,0x70,0x04,0x7e,0x64,0x00,0x00,0x00 -# GFX940: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x0e,0x22,0x04] +# GFX942: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x0e,0x22,0x04] 0x02,0x00,0x08,0xd2,0x04,0x0e,0x22,0x04 -# GFX940: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x01,0x05,0x02] +# GFX942: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x01,0x05,0x02] 0x02,0x00,0x08,0xd2,0x04,0x01,0x05,0x02 -# GFX940: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x07,0x09,0x00] +# GFX942: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x07,0x09,0x00] 0x02,0x00,0x08,0xd2,0x04,0x07,0x09,0x00 -# GFX940: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x08,0x09,0x04] +# GFX942: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x08,0xd2,0x04,0x08,0x09,0x04] 0x02,0x00,0x08,0xd2,0x04,0x08,0x09,0x04 -# GFX940: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_wbl2 sc1 ; encoding: [0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00] 0x00,0x80,0xa0,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_wbl2 sc0 ; encoding: [0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00] 0x00,0x40,0xa0,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_wbl2 sc0 sc1 ; encoding: [0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00] 0x00,0xc0,0xa0,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_inv sc0 ; encoding: [0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00] 0x00,0x40,0xa4,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_inv sc1 ; encoding: [0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00] 0x00,0x80,0xa4,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] +# GFX942: buffer_inv sc0 sc1 ; encoding: [0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00] 0x00,0xc0,0xa4,0xe0,0x00,0x00,0x00,0x00 -# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] +# GFX942: buffer_atomic_swap v5, off, s[8:11], s3 sc0 ; encoding: [0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03] 0x00,0x40,0x00,0xe1,0x00,0x05,0x02,0x03 -# GFX940: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] +# GFX942: buffer_atomic_swap v5, off, s[8:11], s3 nt ; encoding: [0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03] 0x00,0x00,0x02,0xe1,0x00,0x05,0x02,0x03 -# GFX940: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], v[2:3], v[4:5] ; encoding: [0x02,0x09,0x08,0x08] 0x02,0x09,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x09] +# GFX942: v_fmac_f64_e32 v[254:255], v[2:3], v[4:5] ; encoding: [0x02,0x09,0xfc,0x09] 0x02,0x09,0xfc,0x09 -# GFX940: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], v[254:255], v[4:5] ; encoding: [0xfe,0x09,0x08,0x08] 0xfe,0x09,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], flat_scratch, v[4:5] ; encoding: [0x66,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], flat_scratch, v[4:5] ; encoding: [0x66,0x08,0x08,0x08] 0x66,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], vcc, v[4:5] ; encoding: [0x6a,0x08,0x08,0x08] 0x6a,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x08] 0x7e,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], 0, v[4:5] ; encoding: [0x80,0x08,0x08,0x08] 0x80,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], -1, v[4:5] ; encoding: [0xc1,0x08,0x08,0x08] 0xc1,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], 0.5, v[4:5] ; encoding: [0xf0,0x08,0x08,0x08] 0xf0,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], -4.0, v[4:5] ; encoding: [0xf7,0x08,0x08,0x08] 0xf7,0x08,0x08,0x08 -# GFX940: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x56,0x34,0x12,0xaf] +# GFX942: v_fmac_f64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x56,0x34,0x12,0xaf] 0xff,0x08,0x08,0x08,0x56,0x34,0x12,0xaf -# GFX940: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x73,0x72,0x71,0x3f] +# GFX942: v_fmac_f64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x08,0x73,0x72,0x71,0x3f] 0xff,0x08,0x08,0x08,0x73,0x72,0x71,0x3f -# GFX940: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x08] +# GFX942: v_fmac_f64_e32 v[4:5], v[2:3], v[254:255] ; encoding: [0x02,0xfd,0x09,0x08] 0x02,0xfd,0x09,0x08 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[254:255], v[2:3], v[8:9] ; encoding: [0xfe,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[254:255], v[2:3], v[8:9] ; encoding: [0xfe,0x00,0x04,0xd1,0x02,0x11,0x02,0x00] 0xfe,0x00,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[254:255], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xfe,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[254:255], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xfe,0x11,0x02,0x00] 0x04,0x00,0x04,0xd1,0xfe,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], flat_scratch, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x66,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], flat_scratch, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x66,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0x66,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], vcc, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x6a,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], vcc, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x6a,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0x6a,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], exec, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x7e,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], exec, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x7e,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0x7e,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], 0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x80,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], 0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x80,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0x80,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], -1, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xc1,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], -1, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xc1,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0xc1,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], 0.5, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf0,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], 0.5, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf0,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0xf0,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], -4.0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf7,0x10,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], -4.0, v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0xf7,0x10,0x02,0x00] 0x04,0x00,0x04,0xd1,0xf7,0x10,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[254:255] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x03,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[254:255] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x03,0x00] 0x04,0x00,0x04,0xd1,0x02,0xfd,0x03,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], flat_scratch ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xcd,0x00,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], flat_scratch ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xcd,0x00,0x00] 0x04,0x00,0x04,0xd1,0x02,0xcd,0x00,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xd5,0x00,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], vcc ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xd5,0x00,0x00] 0x04,0x00,0x04,0xd1,0x02,0xd5,0x00,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x00,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], exec ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xfd,0x00,0x00] 0x04,0x00,0x04,0xd1,0x02,0xfd,0x00,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x01,0x01,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], 0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x01,0x01,0x00] 0x04,0x00,0x04,0xd1,0x02,0x01,0x01,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x83,0x01,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -1 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x83,0x01,0x00] 0x04,0x00,0x04,0xd1,0x02,0x83,0x01,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xe1,0x01,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], 0.5 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xe1,0x01,0x00] 0x04,0x00,0x04,0xd1,0x02,0xe1,0x01,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xef,0x01,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -4.0 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0xef,0x01,0x00] 0x04,0x00,0x04,0xd1,0x02,0xef,0x01,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x20] +# GFX942: v_fmac_f64_e64 v[4:5], -v[2:3], v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x20] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x20 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x40] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x40] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x40 -# GFX940: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x60] +# GFX942: v_fmac_f64_e64 v[4:5], -v[2:3], -v[8:9] ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x60] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x60 -# GFX940: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], |v[2:3]|, v[8:9] ; encoding: [0x04,0x01,0x04,0xd1,0x02,0x11,0x02,0x00] 0x04,0x01,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], |v[8:9]| ; encoding: [0x04,0x02,0x04,0xd1,0x02,0x11,0x02,0x00] 0x04,0x02,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], |v[2:3]|, |v[8:9]| ; encoding: [0x04,0x03,0x04,0xd1,0x02,0x11,0x02,0x00] 0x04,0x03,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x04,0xd1,0x02,0x11,0x02,0x00] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] clamp ; encoding: [0x04,0x80,0x04,0xd1,0x02,0x11,0x02,0x00] 0x04,0x80,0x04,0xd1,0x02,0x11,0x02,0x00 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x08] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x08] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x08 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x10] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] mul:4 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x10] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x10 -# GFX940: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] +# GFX942: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18 -# GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] +# GFX942: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42] 0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42 -# GFX940: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42] +# GFX942: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42] 0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42 -# GFX940: global_atomic_add_f32 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_add_f32 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x35,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: global_atomic_add_f32 v[0:1], v2, off sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_add_f32 v[0:1], v2, off sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x34,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: global_atomic_add_f32 v0, v2, s[0:1] sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x00,0x00] +# GFX942: global_atomic_add_f32 v0, v2, s[0:1] sc1 ; encoding: [0x00,0x80,0x34,0xdf,0x00,0x02,0x00,0x00] 0x00,0x80,0x34,0xdf,0x00,0x02,0x00,0x00 -# GFX940: global_atomic_add_f32 v1, v0, v2, s[0:1] sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x00,0x01] +# GFX942: global_atomic_add_f32 v1, v0, v2, s[0:1] sc0 sc1 ; encoding: [0x00,0x80,0x35,0xdf,0x00,0x02,0x00,0x01] 0x00,0x80,0x35,0xdf,0x00,0x02,0x00,0x01 -# GFX940: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x39,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_pk_add_f16 v0, v[0:1], v2, off sc0 sc1 ; encoding: [0x00,0x80,0x39,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x39,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; encoding: [0x00,0x00,0x3d,0xdf,0x00,0x02,0x00,0x00] +# GFX942: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 ; encoding: [0x00,0x00,0x3d,0xdf,0x00,0x02,0x00,0x00] 0x00,0x00,0x3d,0xdf,0x00,0x02,0x00,0x00 -# GFX940: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x3c,0xdf,0x00,0x02,0x00,0x00] +# GFX942: flat_atomic_add_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x3c,0xdf,0x00,0x02,0x00,0x00] 0x00,0x00,0x3c,0xdf,0x00,0x02,0x00,0x00 -# GFX940: flat_atomic_min_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x40,0xdf,0x00,0x02,0x00,0x00] +# GFX942: flat_atomic_min_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x40,0xdf,0x00,0x02,0x00,0x00] 0x00,0x00,0x40,0xdf,0x00,0x02,0x00,0x00 -# GFX940: flat_atomic_max_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x44,0xdf,0x00,0x02,0x00,0x00] +# GFX942: flat_atomic_max_f64 v[0:1], v[2:3] sc1 ; encoding: [0x00,0x00,0x44,0xdf,0x00,0x02,0x00,0x00] 0x00,0x00,0x44,0xdf,0x00,0x02,0x00,0x00 -# GFX940: global_atomic_add_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x3c,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_add_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x3c,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x3c,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: global_atomic_min_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x40,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_min_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x40,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x40,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] +# GFX942: global_atomic_max_f64 v[0:1], v[2:3], off sc1 ; encoding: [0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00] 0x00,0x80,0x44,0xdf,0x00,0x02,0x7f,0x00 -# GFX940: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] +# GFX942: buffer_atomic_add_f32 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x34,0xe1,0x00,0x04,0x02,0x03 -# GFX940: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] +# GFX942: buffer_atomic_pk_add_f16 v4, off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x38,0xe1,0x00,0x04,0x02,0x03 -# GFX940: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] +# GFX942: buffer_atomic_add_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x3c,0xe1,0x00,0x04,0x02,0x03 -# GFX940: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] +# GFX942: buffer_atomic_max_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x44,0xe1,0x00,0x04,0x02,0x03 -# GFX940: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] +# GFX942: buffer_atomic_min_f64 v[4:5], off, s[8:11], s3 sc1 ; encoding: [0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03] 0x00,0x80,0x40,0xe1,0x00,0x04,0x02,0x03 -# GFX940: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] +# GFX942: v_cvt_f32_bf8_e32 v1, s3 ; encoding: [0x03,0xaa,0x02,0x7e] 0x03,0xaa,0x02,0x7e -# GFX940: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] +# GFX942: v_cvt_f32_bf8_e32 v1, 3 ; encoding: [0x83,0xaa,0x02,0x7e] 0x83,0xaa,0x02,0x7e -# GFX940: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] +# GFX942: v_cvt_f32_bf8_e32 v1, v3 ; encoding: [0x03,0xab,0x02,0x7e] 0x03,0xab,0x02,0x7e -# GFX940: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] +# GFX942: v_cvt_f32_bf8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00] 0xf9,0xaa,0x02,0x7e,0x03,0x06,0x81,0x00 -# GFX940: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] +# GFX942: v_cvt_f32_bf8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff] 0xfa,0xaa,0x02,0x7e,0x03,0x58,0x00,0xff -# GFX940: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] +# GFX942: v_cvt_f32_bf8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08] 0x01,0x00,0x95,0xd1,0x03,0x00,0x00,0x08 -# GFX940: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] +# GFX942: v_cvt_f32_bf8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00] 0xf9,0xaa,0x02,0x7e,0x03,0x66,0x81,0x00 -# GFX940: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] +# GFX942: v_cvt_f32_bf8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00] 0x01,0x80,0x95,0xd1,0x03,0x00,0x00,0x00 -# GFX940: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] +# GFX942: v_cvt_f32_fp8_e32 v1, s3 ; encoding: [0x03,0xa8,0x02,0x7e] 0x03,0xa8,0x02,0x7e -# GFX940: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] +# GFX942: v_cvt_f32_fp8_e32 v1, 3 ; encoding: [0x83,0xa8,0x02,0x7e] 0x83,0xa8,0x02,0x7e -# GFX940: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] +# GFX942: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xa9,0x02,0x7e] 0x03,0xa9,0x02,0x7e -# GFX940: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] +# GFX942: v_cvt_f32_fp8_sdwa v1, s3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00] 0xf9,0xa8,0x02,0x7e,0x03,0x06,0x81,0x00 -# GFX940: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] +# GFX942: v_cvt_f32_fp8_dpp v1, v3 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff] 0xfa,0xa8,0x02,0x7e,0x03,0x58,0x00,0xff -# GFX940: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] +# GFX942: v_cvt_f32_fp8_e64 v1, s3 mul:2 ; encoding: [0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08] 0x01,0x00,0x94,0xd1,0x03,0x00,0x00,0x08 -# GFX940: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] +# GFX942: v_cvt_f32_fp8_sdwa v1, s3 clamp mul:2 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00] 0xf9,0xa8,0x02,0x7e,0x03,0x66,0x81,0x00 -# GFX940: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] +# GFX942: v_cvt_f32_fp8_e64 v1, s3 clamp ; encoding: [0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00] 0x01,0x80,0x94,0xd1,0x03,0x00,0x00,0x00 -# GFX940: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] +# GFX942: v_cvt_f32_fp8_sdwa v1, 3 src0_sel:BYTE_1 ; encoding: [0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00] 0xf9,0xa8,0x02,0x7e,0x83,0x06,0x81,0x00 -# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] +# GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xae,0x04,0x7e] 0x03,0xae,0x04,0x7e -# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] +# GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xae,0x04,0x7e] 0x83,0xae,0x04,0x7e -# GFX940: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] +# GFX942: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xaf,0x04,0x7e] 0x03,0xaf,0x04,0x7e -# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] +# GFX942: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00] 0xf9,0xae,0x04,0x7e,0x03,0x06,0x85,0x00 -# GFX940: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] +# GFX942: v_cvt_pk_f32_bf8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff] 0xfa,0xae,0x00,0x7e,0x03,0x53,0x01,0xff -# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] +# GFX942: v_cvt_pk_f32_bf8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08] 0x02,0x00,0x97,0xd1,0x03,0x00,0x00,0x08 -# GFX940: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] +# GFX942: v_cvt_pk_f32_bf8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00] 0xf9,0xae,0x04,0x7e,0x03,0x66,0x85,0x00 -# GFX940: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] +# GFX942: v_cvt_pk_f32_bf8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00] 0x02,0x80,0x97,0xd1,0x03,0x00,0x00,0x00 -# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] +# GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xac,0x04,0x7e] 0x03,0xac,0x04,0x7e -# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] +# GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xac,0x04,0x7e] 0x83,0xac,0x04,0x7e -# GFX940: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] +# GFX942: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xad,0x04,0x7e] 0x03,0xad,0x04,0x7e -# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] +# GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00] 0xf9,0xac,0x04,0x7e,0x03,0x06,0x85,0x00 -# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] +# GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], 3 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00] 0xf9,0xac,0x04,0x7e,0x83,0x06,0x85,0x00 -# GFX940: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] +# GFX942: v_cvt_pk_f32_fp8_dpp v[0:1], v3 row_newbcast:3 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff] 0xfa,0xac,0x00,0x7e,0x03,0x53,0x01,0xff -# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] +# GFX942: v_cvt_pk_f32_fp8_e64 v[2:3], s3 mul:2 ; encoding: [0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08] 0x02,0x00,0x96,0xd1,0x03,0x00,0x00,0x08 -# GFX940: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] +# GFX942: v_cvt_pk_f32_fp8_sdwa v[2:3], s3 clamp mul:2 src0_sel:WORD_1 ; encoding: [0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00] 0xf9,0xac,0x04,0x7e,0x03,0x66,0x85,0x00 -# GFX940: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] +# GFX942: v_cvt_pk_f32_fp8_e64 v[2:3], s3 clamp ; encoding: [0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00] 0x02,0x80,0x96,0xd1,0x03,0x00,0x00,0x00 -# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_pk_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00] 0x01,0x00,0xa3,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] +# GFX942: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20] 0x01,0x02,0xa3,0xd2,0x02,0x07,0x02,0x20 -# GFX940: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] +# GFX942: v_cvt_pk_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00] 0x01,0x00,0xa3,0xd2,0x02,0x06,0x01,0x00 -# GFX940: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_pk_bf8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00] 0x01,0x40,0xa3,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_pk_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00] 0x01,0x00,0xa2,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] +# GFX942: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20] 0x01,0x02,0xa2,0xd2,0x02,0x07,0x02,0x20 -# GFX940: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] +# GFX942: v_cvt_pk_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00] 0x01,0x00,0xa2,0xd2,0x02,0x06,0x01,0x00 -# GFX940: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_pk_fp8_f32 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00] 0x01,0x40,0xa2,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00] 0x01,0x00,0xa5,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] +# GFX942: v_cvt_sr_bf8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00] 0x01,0x00,0xa5,0xd2,0x02,0x06,0x01,0x00 -# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00] 0x01,0x60,0xa5,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_bf8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00] 0x01,0x40,0xa5,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] +# GFX942: v_cvt_sr_bf8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20] 0x01,0x01,0xa5,0xd2,0x02,0x06,0x02,0x20 -# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00] 0x01,0x00,0xa4,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] +# GFX942: v_cvt_sr_fp8_f32 v1, s2, 3 ; encoding: [0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00] 0x01,0x00,0xa4,0xd2,0x02,0x06,0x01,0x00 -# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00] 0x01,0x60,0xa4,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] +# GFX942: v_cvt_sr_fp8_f32 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00] 0x01,0x40,0xa4,0xd2,0x02,0x07,0x02,0x00 -# GFX940: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] +# GFX942: v_cvt_sr_fp8_f32 v1, -|s2|, v3 ; encoding: [0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20] 0x01,0x01,0xa4,0xd2,0x02,0x06,0x02,0x20 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx942_flat.txt similarity index 56% rename from llvm/test/MC/Disassembler/AMDGPU/gfx940_flat.txt rename to llvm/test/MC/Disassembler/AMDGPU/gfx942_flat.txt index 856fcd894638c..b9ea2c5ac466d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_flat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx942_flat.txt @@ -1,1057 +1,1057 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX940 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX942 %s -# GFX940: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dword a2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x50,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dword a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x50,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dword a2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x50,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dword a2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x50,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dword a2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x50,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dword a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x50,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dword a2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x50,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dword a2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x50,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dword v2, v4, s6 ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x50,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dword v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x50,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dword v2, v4, off ; encoding: [0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dword v2, v4, off offset:16 ; encoding: [0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x50,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dword v2, off, s6 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x50,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dword v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x50,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dword v2, off, off ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dword v2, off, off offset:16 ; encoding: [0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x50,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x54,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x54,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x54,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x54,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x54,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x54,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x54,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx2 a[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x54,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], v4, s6 ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x54,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], v4, s6 offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x54,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], v4, off ; encoding: [0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], v4, off offset:16 ; encoding: [0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x54,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], off, s6 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x54,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], off, s6 offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x54,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], off, off ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx2 v[2:3], off, off offset:16 ; encoding: [0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x54,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x58,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x58,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x58,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x58,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x58,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x58,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x58,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx3 a[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x58,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], v4, s6 ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x58,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], v4, s6 offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x58,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], v4, off ; encoding: [0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], v4, off offset:16 ; encoding: [0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x58,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], off, s6 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x58,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], off, s6 offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x58,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], off, off ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx3 v[2:4], off, off offset:16 ; encoding: [0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x58,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x5c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x5c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x5c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_dwordx4 a[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x5c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], v4, s6 ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], v4, s6 offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x5c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], v4, off ; encoding: [0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], v4, off offset:16 ; encoding: [0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x5c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], off, s6 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], off, s6 offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x5c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], off, off ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_dwordx4 v[2:5], off, off offset:16 ; encoding: [0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x5c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte a2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x44,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x44,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte a2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x44,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x44,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte a2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x44,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x44,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte a2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x44,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x44,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte v2, v4, s6 ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x44,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x44,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte v2, v4, off ; encoding: [0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x44,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte v2, off, s6 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x44,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x44,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte v2, off, off ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x44,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x88,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x88,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x88,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x88,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x88,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x88,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16 a2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x88,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x88,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x88,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x88,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x88,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x88,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x88,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16 v2, off, off ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x88,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x8c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x8c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x8c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sbyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x8c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x8c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x8c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x8c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sbyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x8c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x90,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x90,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16 a2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x90,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x90,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16 a2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x90,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x90,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16 a2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x90,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x90,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x90,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x90,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16 v2, v4, off ; encoding: [0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x90,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16 v2, off, s6 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x90,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x90,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16 v2, off, off ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x90,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x94,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x94,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x94,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x94,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x94,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_short_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x94,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16_hi a2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x94,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_short_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x94,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x94,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x94,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x94,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x94,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_short_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x94,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16_hi v2, off, off ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_short_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x94,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sshort a2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_sshort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x4c,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sshort a2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_sshort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x4c,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sshort a2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_sshort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x4c,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sshort a2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_sshort a2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x4c,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sshort v2, v4, s6 ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_sshort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x4c,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sshort v2, v4, off ; encoding: [0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_sshort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x4c,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sshort v2, off, s6 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_sshort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x4c,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sshort v2, off, off ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_sshort v2, off, off offset:16 ; encoding: [0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x4c,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte a2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x40,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x40,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte a2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x40,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte a2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x40,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte a2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x40,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x40,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte a2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x40,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte a2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x40,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte v2, v4, s6 ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x40,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x40,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte v2, v4, off ; encoding: [0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte v2, v4, off offset:16 ; encoding: [0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x40,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte v2, off, s6 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x40,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x40,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte v2, off, off ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte v2, off, off offset:16 ; encoding: [0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x40,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16 a2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x80,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16 a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x80,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16 a2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x80,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16 a2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x80,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16 a2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x80,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16 a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x80,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16 a2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x80,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16 a2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x80,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16 v2, v4, s6 ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x80,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16 v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x80,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16 v2, v4, off ; encoding: [0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16 v2, v4, off offset:16 ; encoding: [0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x80,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16 v2, off, s6 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x80,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16 v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x80,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16 v2, off, off ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16 v2, off, off offset:16 ; encoding: [0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x80,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x84,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x84,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x84,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x84,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x84,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x84,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x84,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ubyte_d16_hi a2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x84,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, v4, s6 ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x84,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x84,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, v4, off ; encoding: [0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, v4, off offset:16 ; encoding: [0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x84,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, off, s6 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x84,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x84,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, off, off ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ubyte_d16_hi v2, off, off offset:16 ; encoding: [0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x84,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ushort a2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] 0x00,0x60,0x48,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] +# GFX942: scratch_load_ushort a2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02] 0x10,0x60,0x48,0xdc,0x04,0x00,0x86,0x02 -# GFX940: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ushort a2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] 0x00,0x60,0x48,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] +# GFX942: scratch_load_ushort a2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02] 0x10,0x60,0x48,0xdc,0x04,0x00,0xff,0x02 -# GFX940: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ushort a2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] 0x00,0x40,0x48,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] +# GFX942: scratch_load_ushort a2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02] 0x10,0x40,0x48,0xdc,0x00,0x00,0x86,0x02 -# GFX940: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ushort a2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] 0x00,0x40,0x48,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] +# GFX942: scratch_load_ushort a2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02] 0x10,0x40,0x48,0xdc,0x00,0x00,0xff,0x02 -# GFX940: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ushort v2, v4, s6 ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] 0x00,0x60,0x48,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] +# GFX942: scratch_load_ushort v2, v4, s6 offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02] 0x10,0x60,0x48,0xdc,0x04,0x00,0x06,0x02 -# GFX940: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ushort v2, v4, off ; encoding: [0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] 0x00,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] +# GFX942: scratch_load_ushort v2, v4, off offset:16 ; encoding: [0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02] 0x10,0x60,0x48,0xdc,0x04,0x00,0x7f,0x02 -# GFX940: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ushort v2, off, s6 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] 0x00,0x40,0x48,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] +# GFX942: scratch_load_ushort v2, off, s6 offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02] 0x10,0x40,0x48,0xdc,0x00,0x00,0x06,0x02 -# GFX940: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ushort v2, off, off ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] 0x00,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] +# GFX942: scratch_load_ushort v2, off, off offset:16 ; encoding: [0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02] 0x10,0x40,0x48,0xdc,0x00,0x00,0x7f,0x02 -# GFX940: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_byte v4, a2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x60,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_byte v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x60,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_byte v4, a2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x60,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_byte v4, a2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x60,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_byte off, a2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x60,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_byte off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x60,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_byte off, a2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x60,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_byte off, a2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x60,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_byte v4, v2, s6 ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x60,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_byte v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x60,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_byte v4, v2, off ; encoding: [0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_byte v4, v2, off offset:16 ; encoding: [0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x60,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_byte off, v2, s6 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x60,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_byte off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x60,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_byte off, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_byte off, v2, off offset:16 ; encoding: [0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x60,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_byte_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x64,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_byte_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x64,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_byte_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x64,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_byte_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x64,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_byte_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x64,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_byte_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x64,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_byte_d16_hi off, a2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x64,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_byte_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x64,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_byte_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x64,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_byte_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x64,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_byte_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_byte_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x64,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_byte_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x64,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_byte_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x64,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_byte_d16_hi off, v2, off ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_byte_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x64,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dword v4, a2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x70,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dword v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x70,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dword v4, a2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x70,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dword v4, a2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x70,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dword off, a2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x70,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dword off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x70,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dword off, a2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x70,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dword off, a2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x70,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dword v4, v2, s6 ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x70,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dword v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x70,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dword v4, v2, off ; encoding: [0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dword v4, v2, off offset:16 ; encoding: [0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x70,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dword off, v2, s6 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x70,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dword off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x70,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dword off, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dword off, v2, off offset:16 ; encoding: [0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x70,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx2 v4, a[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x74,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx2 v4, a[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x74,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx2 v4, a[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x74,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx2 v4, a[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x74,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx2 off, a[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x74,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx2 off, a[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x74,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx2 off, a[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x74,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx2 off, a[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x74,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx2 v4, v[2:3], s6 ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x74,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx2 v4, v[2:3], s6 offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x74,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx2 v4, v[2:3], off ; encoding: [0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx2 v4, v[2:3], off offset:16 ; encoding: [0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x74,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx2 off, v[2:3], s6 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x74,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx2 off, v[2:3], s6 offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x74,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx2 off, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx2 off, v[2:3], off offset:16 ; encoding: [0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x74,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx3 v4, a[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x78,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx3 v4, a[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x78,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx3 v4, a[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x78,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx3 v4, a[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x78,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx3 off, a[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x78,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx3 off, a[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x78,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx3 off, a[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x78,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx3 off, a[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x78,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx3 v4, v[2:4], s6 ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x78,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx3 v4, v[2:4], s6 offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x78,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx3 v4, v[2:4], off ; encoding: [0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx3 v4, v[2:4], off offset:16 ; encoding: [0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x78,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx3 off, v[2:4], s6 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x78,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx3 off, v[2:4], s6 offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x78,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx3 off, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx3 off, v[2:4], off offset:16 ; encoding: [0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x78,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx4 v4, a[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx4 v4, a[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x7c,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx4 v4, a[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx4 v4, a[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x7c,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx4 off, a[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_dwordx4 off, a[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x7c,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx4 off, a[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_dwordx4 off, a[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x7c,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx4 v4, v[2:5], s6 ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx4 v4, v[2:5], s6 offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x7c,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx4 v4, v[2:5], off ; encoding: [0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx4 v4, v[2:5], off offset:16 ; encoding: [0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x7c,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx4 off, v[2:5], s6 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_dwordx4 off, v[2:5], s6 offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x7c,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx4 off, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_dwordx4 off, v[2:5], off offset:16 ; encoding: [0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x7c,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_short v4, a2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x68,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_short v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x68,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_short v4, a2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x68,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_short v4, a2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x68,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_short off, a2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x68,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_short off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x68,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_short off, a2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x68,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_short off, a2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x68,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_short v4, v2, s6 ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x68,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_short v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x68,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_short v4, v2, off ; encoding: [0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_short v4, v2, off offset:16 ; encoding: [0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x68,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_short off, v2, s6 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x68,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_short off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x68,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_short off, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_short off, v2, off offset:16 ; encoding: [0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x68,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_short_d16_hi v4, a2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] 0x00,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] +# GFX942: scratch_store_short_d16_hi v4, a2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00] 0x10,0x60,0x6c,0xdc,0x04,0x02,0x86,0x00 -# GFX940: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_short_d16_hi v4, a2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] 0x00,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] +# GFX942: scratch_store_short_d16_hi v4, a2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00] 0x10,0x60,0x6c,0xdc,0x04,0x02,0xff,0x00 -# GFX940: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_short_d16_hi off, a2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] 0x00,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] +# GFX942: scratch_store_short_d16_hi off, a2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00] 0x10,0x40,0x6c,0xdc,0x00,0x02,0x86,0x00 -# GFX940: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_short_d16_hi off, a2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] 0x00,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] +# GFX942: scratch_store_short_d16_hi off, a2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00] 0x10,0x40,0x6c,0xdc,0x00,0x02,0xff,0x00 -# GFX940: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_short_d16_hi v4, v2, s6 ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] 0x00,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] +# GFX942: scratch_store_short_d16_hi v4, v2, s6 offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00] 0x10,0x60,0x6c,0xdc,0x04,0x02,0x06,0x00 -# GFX940: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_short_d16_hi v4, v2, off ; encoding: [0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] 0x00,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] +# GFX942: scratch_store_short_d16_hi v4, v2, off offset:16 ; encoding: [0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00] 0x10,0x60,0x6c,0xdc,0x04,0x02,0x7f,0x00 -# GFX940: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_short_d16_hi off, v2, s6 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] 0x00,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] +# GFX942: scratch_store_short_d16_hi off, v2, s6 offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00] 0x10,0x40,0x6c,0xdc,0x00,0x02,0x06,0x00 -# GFX940: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_short_d16_hi off, v2, off ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] 0x00,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00 -# GFX940: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] +# GFX942: scratch_store_short_d16_hi off, v2, off offset:16 ; encoding: [0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00] 0x10,0x40,0x6c,0xdc,0x00,0x02,0x7f,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx942_mai.txt similarity index 61% rename from llvm/test/MC/Disassembler/AMDGPU/gfx940_mai.txt rename to llvm/test/MC/Disassembler/AMDGPU/gfx942_mai.txt index e6951bc5dc684..d72a91b481205 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx940_mai.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx942_mai.txt @@ -1,598 +1,598 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx940 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX940 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX942 %s -# GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] +# GFX942: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18] 0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18 -# GFX940: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xd6,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xd6,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_i32_16x16x32_i8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xd7,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_i32_16x16x32_i8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xd7,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[2:33] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[2:3], v[4:5], v[2:33] ; encoding: [0x00,0x00,0xdd,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xdd,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[2:33] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[4:5], a[2:33] ; encoding: [0x00,0x80,0xdd,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xdd,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xde,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xde,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xde,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xde,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xdf,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xdf,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x8_bf16 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xe0,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xe0,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xe0,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xe0,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x16_bf16 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xe1,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xe1,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x8_xf32 a[0:3], v[2:3], v[4:5], a[2:5] ; encoding: [0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xbe,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_16x16x8_xf32 v[0:3], v[2:3], v[4:5], v[2:5] ; encoding: [0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xbe,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x4_xf32 v[0:15], v[2:3], v[4:5], v[2:17] ; encoding: [0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04] +# GFX942: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[4:5], a[2:17] ; encoding: [0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04] 0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04 -# GFX940: v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf0,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf0,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf0,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf0,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf1,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf1,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf1,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf1,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf2,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf2,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf2,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf2,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf3,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] ; encoding: [0x00,0x00,0xf3,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf3,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] blgp:5 ; encoding: [0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf3,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf4,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf4,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf4,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_32x32x16_bf8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf4,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf5,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf5,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf5,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_32x32x16_bf8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf5,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf6,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf6,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf6,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_32x32x16_fp8_bf8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf6,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf7,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] ; encoding: [0x00,0x00,0xf7,0xd3,0x02,0x09,0x02,0x04] 0x00,0x00,0xf7,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0x04] +# GFX942: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0x04] 0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0x04 -# GFX940: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0xa4] +# GFX942: v_mfma_f32_32x32x16_fp8_fp8 a[0:15], v[2:3], v[4:5], a[0:15] blgp:5 ; encoding: [0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0xa4] 0x00,0x80,0xf7,0xd3,0x02,0x09,0x02,0xa4 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c] 0x0a,0x0b,0xe2,0xd3,0x02,0x09,0x02,0x0c -# GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14] 0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] 0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c -# GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14] +# GFX942: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14] 0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14 -# GFX940: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] 0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c -# GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14] +# GFX942: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14] 0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] 0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c -# GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v7 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x1e,0x14] +# GFX942: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v7 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x1e,0x14] 0x0a,0x80,0xe8,0xd3,0x02,0x09,0x1e,0x14 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x22,0x0c] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v8 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x22,0x0c] 0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x22,0x0c -# GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14] +# GFX942: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14] 0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c] 0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c -# GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14] +# GFX942: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14] 0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14 -# GFX940: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04] 0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c] 0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c -# GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14] +# GFX942: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14] 0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14 -# GFX940: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04] 0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04] 0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c] 0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04] 0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14] 0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07] 0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04] +# GFX942: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04] 0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04 -# GFX940: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xf8,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf8,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_16x16x64_bf8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf8,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xf8,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xf9,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf9,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_16x16x64_bf8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xf9,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xf9,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xfa,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfa,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_16x16x64_fp8_bf8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfa,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xfa,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xfb,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfb,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_16x16x64_fp8_fp8 a[0:3], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfb,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xfb,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xfc,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfc,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_32x32x32_bf8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfc,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xfc,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xfd,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfd,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_32x32x32_bf8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfd,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xfd,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xfe,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfe,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_32x32x32_fp8_bf8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xfe,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xfe,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] +# GFX942: v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], a[2:3], v[4:7], v1 cbsz:3 abid:1 ; encoding: [0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c] 0x00,0x0b,0xff,0xd3,0x02,0x09,0x06,0x0c -# GFX940: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14] +# GFX942: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14 -# GFX940: v_mfma_f32_16x16x16_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04] +# GFX942: v_mfma_f32_16x16x16_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04] 0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04 -# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f] +# GFX942: v_mfma_f32_16x16x16_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f] 0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f -# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], v[254:255], a[254:255], v[252:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77] +# GFX942: v_mfma_f32_16x16x16_f16 v[252:255], v[254:255], a[254:255], v[252:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77] 0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77 -# GFX940: v_mfma_f32_16x16x16_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff] +# GFX942: v_mfma_f32_16x16x16_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff] 0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff -# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07] +# GFX942: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07] 0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07 -# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f] +# GFX942: v_mfma_f32_16x16x1_4b_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f] 0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f -# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77] +# GFX942: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77] 0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77 -# GFX940: v_mfma_f32_16x16x1_4b_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff] +# GFX942: v_mfma_f32_16x16x1_4b_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff] 0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff -# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07] +# GFX942: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07] 0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07 -# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f] +# GFX942: v_mfma_f32_16x16x4_4b_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f] 0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f -# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77] +# GFX942: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77] 0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77 -# GFX940: v_mfma_f32_16x16x4_4b_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff] +# GFX942: v_mfma_f32_16x16x4_4b_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff] 0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff -# GFX940: v_mfma_f32_16x16x4_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04] +# GFX942: v_mfma_f32_16x16x4_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04] 0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04 -# GFX940: v_mfma_f32_16x16x4_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f] +# GFX942: v_mfma_f32_16x16x4_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f] 0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f -# GFX940: v_mfma_f32_16x16x4_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74] +# GFX942: v_mfma_f32_16x16x4_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74] 0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74 -# GFX940: v_mfma_f32_16x16x4_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff] +# GFX942: v_mfma_f32_16x16x4_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff] 0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff -# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07] +# GFX942: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07] 0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07 -# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], a1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f] +# GFX942: v_mfma_f32_32x32x1_2b_f32 v[224:255], a1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f] 0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f -# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, a2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77] +# GFX942: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, a2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77] 0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77 -# GFX940: v_mfma_f32_32x32x1_2b_f32 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff] +# GFX942: v_mfma_f32_32x32x1_2b_f32 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff] 0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff -# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07] +# GFX942: v_mfma_f32_32x32x2_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07] 0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07 -# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f] +# GFX942: v_mfma_f32_32x32x2_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f] 0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f -# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77] +# GFX942: v_mfma_f32_32x32x2_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77] 0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77 -# GFX940: v_mfma_f32_32x32x2_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff] +# GFX942: v_mfma_f32_32x32x2_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff] 0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff -# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07] +# GFX942: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07] 0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07 -# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], a[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f] +# GFX942: v_mfma_f32_32x32x4_2b_f16 v[224:255], a[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f] 0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f -# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], a[4:5], v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77] +# GFX942: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], a[4:5], v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77] 0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77 -# GFX940: v_mfma_f32_32x32x4_2b_f16 a[224:255], a[254:255], a[254:255], a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff] +# GFX942: v_mfma_f32_32x32x4_2b_f16 a[224:255], a[254:255], a[254:255], a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff] 0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff -# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07] +# GFX942: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07] 0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07 -# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f] +# GFX942: v_mfma_f32_32x32x8_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f] 0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f -# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77] +# GFX942: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77] 0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77 -# GFX940: v_mfma_f32_32x32x8_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff] +# GFX942: v_mfma_f32_32x32x8_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff] 0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff -# GFX940: v_mfma_f32_4x4x1_16b_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04] +# GFX942: v_mfma_f32_4x4x1_16b_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04] 0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04 -# GFX940: v_mfma_f32_4x4x1_16b_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f] +# GFX942: v_mfma_f32_4x4x1_16b_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f] 0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f -# GFX940: v_mfma_f32_4x4x1_16b_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74] +# GFX942: v_mfma_f32_4x4x1_16b_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74] 0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74 -# GFX940: v_mfma_f32_4x4x1_16b_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff] +# GFX942: v_mfma_f32_4x4x1_16b_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff] 0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff -# GFX940: v_mfma_f32_4x4x4_16b_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04] +# GFX942: v_mfma_f32_4x4x4_16b_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04] 0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04 -# GFX940: v_mfma_f32_4x4x4_16b_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f] +# GFX942: v_mfma_f32_4x4x4_16b_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f] 0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f -# GFX940: v_mfma_f32_4x4x4_16b_f16 a[10:13], v[2:3], a[4:5], a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74] +# GFX942: v_mfma_f32_4x4x4_16b_f16 a[10:13], v[2:3], a[4:5], a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74] 0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74 -# GFX940: v_mfma_f32_4x4x4_16b_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff] +# GFX942: v_mfma_f32_4x4x4_16b_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff] 0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff -# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f] +# GFX942: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f] 0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f -# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], v1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17] +# GFX942: v_mfma_i32_16x16x4_4b_i8 v[240:255], v1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17] 0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17 -# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, v2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f] +# GFX942: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, v2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f] 0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f -# GFX940: v_mfma_i32_16x16x4_4b_i8 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff] +# GFX942: v_mfma_i32_16x16x4_4b_i8 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff] 0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff -# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07] +# GFX942: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07] 0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07 -# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, a2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17] +# GFX942: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, a2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17] 0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17 -# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], a1, v2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f] +# GFX942: v_mfma_i32_32x32x4_2b_i8 v[224:255], a1, v2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f] 0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f -# GFX940: v_mfma_i32_32x32x4_2b_i8 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff] +# GFX942: v_mfma_i32_32x32x4_2b_i8 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff] 0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff -# GFX940: v_mfma_i32_4x4x4_16b_i8 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04] +# GFX942: v_mfma_i32_4x4x4_16b_i8 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04] 0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04 -# GFX940: v_mfma_i32_4x4x4_16b_i8 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f] +# GFX942: v_mfma_i32_4x4x4_16b_i8 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f] 0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f -# GFX940: v_mfma_i32_4x4x4_16b_i8 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74] +# GFX942: v_mfma_i32_4x4x4_16b_i8 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74] 0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74 -# GFX940: v_mfma_i32_4x4x4_16b_i8 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff] +# GFX942: v_mfma_i32_4x4x4_16b_i8 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff] 0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff diff --git a/llvm/test/MachineVerifier/AMDGPU/writelane_m0.mir b/llvm/test/MachineVerifier/AMDGPU/writelane_m0.mir index 2db6f47a42234..e3f7a144a88f6 100644 --- a/llvm/test/MachineVerifier/AMDGPU/writelane_m0.mir +++ b/llvm/test/MachineVerifier/AMDGPU/writelane_m0.mir @@ -1,7 +1,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=none -o - %s | FileCheck %s diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml index 416419b3a333f..8d5ce006e24dc 100644 --- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -150,14 +150,6 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX90C %s # RUN: obj2yaml %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX90C %s -# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX940/' %s | yaml2obj -o %t.o.AMDGCN_GFX940 -# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX940 %s -# RUN: obj2yaml %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX940 %s - -# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX941/' %s | yaml2obj -o %t.o.AMDGCN_GFX941 -# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX941 %s -# RUN: obj2yaml %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX941 %s - # RUN: sed -e 's//64/' -e 's//AMDGCN_GFX942/' %s | yaml2obj -o %t.o.AMDGCN_GFX942 # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s # RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s @@ -406,12 +398,6 @@ # ELF-AMDGCN-GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32) # YAML-AMDGCN-GFX90C: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX90C ] -# ELF-AMDGCN-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40) -# YAML-AMDGCN-GFX940: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX940 ] - -# ELF-AMDGCN-GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B) -# YAML-AMDGCN-GFX941: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX941 ] - # ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C) # YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ] diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll index 69cf9697d4b30..bfc9be32ad5fa 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -129,10 +129,10 @@ define float @test_atomicrmw_fadd_f32_global_agent(ptr addrspace(1) %ptr, float ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -246,10 +246,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -351,10 +351,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -468,10 +468,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -573,10 +573,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -678,10 +678,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -783,10 +783,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -888,10 +888,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -993,10 +993,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1098,10 +1098,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1191,10 +1191,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -1284,10 +1284,10 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -1393,10 +1393,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent(ptr addrspace(1) %ptr, f ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1510,10 +1510,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1615,10 +1615,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1732,10 +1732,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1837,10 +1837,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -1942,10 +1942,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -2047,10 +2047,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2140,10 +2140,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2245,10 +2245,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2338,10 +2338,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2419,10 +2419,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -2500,10 +2500,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_ ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -2789,22 +2789,22 @@ define float @test_atomicrmw_fmax_f32_global_agent(ptr addrspace(1) %ptr, float ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2918,22 +2918,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3023,22 +3023,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3152,22 +3152,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3257,22 +3257,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3386,22 +3386,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3491,22 +3491,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3620,22 +3620,22 @@ define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3729,22 +3729,22 @@ define float @test_atomicrmw_fmin_f32_global_agent(ptr addrspace(1) %ptr, float ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3858,22 +3858,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3963,22 +3963,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4092,22 +4092,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4197,22 +4197,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4326,22 +4326,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4431,22 +4431,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4560,22 +4560,22 @@ define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -4609,7 +4609,7 @@ attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll index 0976022825ced..dc751c255f263 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -151,10 +151,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -256,10 +256,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -373,10 +373,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -478,10 +478,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -583,10 +583,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -698,10 +698,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -803,10 +803,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -908,10 +908,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1001,10 +1001,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -1094,10 +1094,10 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -1225,10 +1225,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1330,10 +1330,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memor ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1447,10 +1447,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1552,10 +1552,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -1657,10 +1657,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f32_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -1760,10 +1760,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1865,10 +1865,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1958,10 +1958,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2039,10 +2039,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR1]] { @@ -2120,10 +2120,10 @@ define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX10-LABEL: define void @test_atomicrmw_fadd_noret_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR2]] { @@ -2431,22 +2431,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2536,22 +2536,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2665,22 +2665,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2792,22 +2792,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2897,22 +2897,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3026,22 +3026,22 @@ define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmax_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3157,22 +3157,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3262,22 +3262,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3391,22 +3391,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3518,22 +3518,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3623,22 +3623,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3752,22 +3752,22 @@ define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP6]] ; -; GFX940-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 -; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX940: atomicrmw.start: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret float [[TMP6]] +; GFX942-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR]], align 4 +; GFX942-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX942: atomicrmw.start: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret float [[TMP6]] ; ; GFX10-LABEL: define float @test_atomicrmw_fmin_f32_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -3801,7 +3801,7 @@ attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll index af9933fa9e726..1c2ae608711cc 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -129,10 +129,10 @@ define double @test_atomicrmw_fadd_f64_global_agent(ptr addrspace(1) %ptr, doubl ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -246,10 +246,10 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -375,10 +375,10 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -492,10 +492,10 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -609,10 +609,10 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -726,10 +726,10 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -1261,10 +1261,10 @@ define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, doubl ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[TMP6]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1378,10 +1378,10 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1495,10 +1495,10 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1612,10 +1612,10 @@ define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1857,10 +1857,10 @@ define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, doubl ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[TMP6]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1974,10 +1974,10 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -2091,10 +2091,10 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -2208,10 +2208,10 @@ define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -2393,7 +2393,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll index d01dd2eb29538..72ecbf7708d86 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -139,10 +139,10 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -268,10 +268,10 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory(pt ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -385,10 +385,10 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -502,10 +502,10 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -619,10 +619,10 @@ define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -1164,10 +1164,10 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1281,10 +1281,10 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory(pt ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1398,10 +1398,10 @@ define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1653,10 +1653,10 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1770,10 +1770,10 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory(pt ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1887,10 +1887,10 @@ define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_mem ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -2072,7 +2072,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll index b8196cfcc3510..bb6596fb39b56 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,BASE %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,GFX940 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,GFX942 %s ; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,R600 %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -1379,5 +1379,5 @@ define i16 @test_atomicrmw_add_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; BASE: {{.*}} ; GCN: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} ; R600: {{.*}} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-agent.ll index c3a0a4192ff17..a1007bacd522f 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -649,7 +649,7 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_agent__amdgpu_no_fine_grained_me ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. @@ -665,4 +665,4 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_agent__amdgpu_no_fine_grained_me ; GFX906: {{.*}} ; GFX908: {{.*}} ; GFX90A: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-system.ll index be3aaeb170673..08288848efd66 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i32-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -809,7 +809,7 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_m ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. @@ -825,4 +825,4 @@ define i32 @test_atomicrmw_udec_wrap_i32_global_system__amdgpu_no_fine_grained_m ; GFX906: {{.*}} ; GFX908: {{.*}} ; GFX90A: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-agent.ll index 77fe5e2aba913..7586a0af43c95 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -649,7 +649,7 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_agent__amdgpu_no_fine_grained_me ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. @@ -665,4 +665,4 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_agent__amdgpu_no_fine_grained_me ; GFX906: {{.*}} ; GFX908: {{.*}} ; GFX90A: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-system.ll index bd2aa846efb21..4f3979f25076e 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i64-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -809,7 +809,7 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_m ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. @@ -825,4 +825,4 @@ define i64 @test_atomicrmw_udec_wrap_i64_global_system__amdgpu_no_fine_grained_m ; GFX906: {{.*}} ; GFX908: {{.*}} ; GFX90A: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll index db01f221f2911..2d2e895e7578d 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefix=GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefix=GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefix=GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefix=GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefix=GFX1100 %s define float @syncscope_system(ptr %addr, float %val) { @@ -47,9 +47,9 @@ define float @syncscope_system(ptr %addr, float %val) { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @syncscope_system( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @syncscope_system( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX1100-LABEL: @syncscope_system( ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]] @@ -117,9 +117,9 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @syncscope_workgroup_rtn( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @syncscope_workgroup_rtn( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX1100-LABEL: @syncscope_workgroup_rtn( ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] @@ -195,9 +195,9 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @syncscope_workgroup_nortn( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @syncscope_workgroup_nortn( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX1100-LABEL: @syncscope_workgroup_nortn( ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] @@ -255,9 +255,9 @@ define float @no_unsafe(ptr %addr, float %val) { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: @no_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @no_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX942-NEXT: ret float [[RES]] ; ; GFX1100-LABEL: @no_unsafe( ; GFX1100-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 4 @@ -337,9 +337,9 @@ define float @flat_atomicrmw_fadd_f32__align32(ptr %addr, float %val) { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @flat_atomicrmw_fadd_f32__align32( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @flat_atomicrmw_fadd_f32__align32( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX1100-LABEL: @flat_atomicrmw_fadd_f32__align32( ; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index 84d9a64efa0f7..4aee397a0152c 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX9 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX11 %s define void @test_atomicrmw_fadd_f32_global_no_use_unsafe(ptr addrspace(1) %ptr, float %value) #3 { @@ -71,9 +71,9 @@ define void @test_atomicrmw_fadd_f32_global_no_use_unsafe(ptr addrspace(1) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -160,9 +160,9 @@ define void @test_atomicrmw_fadd_f32_buffer_fat_ptr_no_use_unsafe(ptr addrspace( ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_no_use_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_no_use_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_no_use_unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(7) [[PTR:%.*]], align 4 @@ -249,9 +249,9 @@ define void @test_atomicrmw_fadd_f32_as999_no_use_unsafe(ptr addrspace(999) %ptr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_as999_no_use_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_as999_no_use_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_as999_no_use_unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(999) [[PTR:%.*]], align 4 @@ -326,9 +326,9 @@ define float @test_atomicrmw_fadd_f32_global_unsafe(ptr addrspace(1) %ptr, float ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_global_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_unsafe( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] @@ -391,9 +391,9 @@ define float @test_atomicrmw_fadd_f32_buffer_fat_ptr_unsafe(ptr addrspace(7) %pt ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_buffer_fat_ptr_unsafe( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(7) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -456,9 +456,9 @@ define float @test_atomicrmw_fadd_f32_as999_unsafe(ptr addrspace(999) %ptr, floa ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_as999_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_as999_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_as999_unsafe( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(999) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -521,9 +521,9 @@ define double @test_atomicrmw_fadd_f64_global_unsafe(ptr addrspace(1) %ptr, doub ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_global_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_global_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_global_unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8 @@ -620,9 +620,9 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #3 { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !noalias.addrspace [[META1:![0-9]+]], !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !noalias.addrspace [[META1:![0-9]+]], !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !noalias.addrspace [[META1:![0-9]+]], !amdgpu.no.fine.grained.memory [[META0]] @@ -685,9 +685,9 @@ define double @test_atomicrmw_fadd_f64_flat_unsafe__noprivate(ptr %ptr, double % ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !noalias.addrspace [[META1]], !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe__noprivate( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !noalias.addrspace [[META1]], !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe__noprivate( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !noalias.addrspace [[META1]], !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe__noprivate( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 @@ -818,23 +818,23 @@ define double @test_atomicrmw_fadd_f64_flat_unsafe(ptr %ptr, double %value) #3 { ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe( -; GFX940-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR:%.*]]) -; GFX940-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] -; GFX940: atomicrmw.private: -; GFX940-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) -; GFX940-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: [[NEW:%.*]] = fadd double [[LOADED_PRIVATE]], [[VALUE:%.*]] -; GFX940-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: br label [[ATOMICRMW_PHI:%.*]] -; GFX940: atomicrmw.global: -; GFX940-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("wavefront") monotonic, align 8, !noalias.addrspace [[META1]], !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: br label [[ATOMICRMW_PHI]] -; GFX940: atomicrmw.phi: -; GFX940-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], [[ATOMICRMW_GLOBAL]] ] -; GFX940-NEXT: br label [[ATOMICRMW_END:%.*]] -; GFX940: atomicrmw.end: -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe( +; GFX942-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR:%.*]]) +; GFX942-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX942: atomicrmw.private: +; GFX942-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; GFX942-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: [[NEW:%.*]] = fadd double [[LOADED_PRIVATE]], [[VALUE:%.*]] +; GFX942-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX942: atomicrmw.global: +; GFX942-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("wavefront") monotonic, align 8, !noalias.addrspace [[META1]], !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: br label [[ATOMICRMW_PHI]] +; GFX942: atomicrmw.phi: +; GFX942-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], [[ATOMICRMW_GLOBAL]] ] +; GFX942-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX942: atomicrmw.end: +; GFX942-NEXT: ret double [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_flat_unsafe( ; GFX11-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR:%.*]]) @@ -1040,9 +1040,9 @@ define void @test_atomicrmw_fadd_f32_global_no_use_denorm_flush(ptr addrspace(1) ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_no_use_denorm_flush( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_global_no_use_denorm_flush( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_no_use_denorm_flush( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -1081,9 +1081,9 @@ define float @test_atomicrmw_fadd_f32_local(ptr addrspace(3) %ptr, float %value) ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 @@ -1356,9 +1356,9 @@ define double @test_atomicrmw_fadd_f64_local(ptr addrspace(3) %ptr, double %valu ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8 ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_local( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8 -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_local( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] seq_cst, align 8 +; GFX942-NEXT: ret double [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_local( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8 @@ -1445,9 +1445,9 @@ define float @test_atomicrmw_fadd_f32_global_agent(ptr addrspace(1) %ptr, float ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret float [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("agent") monotonic, align 4 -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_global_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("agent") monotonic, align 4 +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -1531,9 +1531,9 @@ define void @test_atomicrmw_fadd_f32_global_no_use_unsafe_strictfp(ptr addrspace ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_strictfp( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_strictfp( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_global_no_use_unsafe_strictfp( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]] @@ -1596,9 +1596,9 @@ define double @test_atomicrmw_fadd_f64_global_unsafe_strictfp(ptr addrspace(1) % ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]] syncscope("wavefront") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_global_unsafe_strictfp( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR:%.*]], align 8 @@ -1649,9 +1649,9 @@ define float @test_atomicrmw_fadd_f32_local_strictfp(ptr addrspace(3) %ptr, floa ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 ; GFX90A-NEXT: ret float [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_strictfp( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local_strictfp( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 +; GFX942-NEXT: ret float [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_strictfp( ; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] seq_cst, align 4 @@ -2244,9 +2244,9 @@ define void @test_atomicrmw_fadd_f32_local_noret(ptr addrspace(3) %ptr, float %v ; GFX90A-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_noret( -; GFX940-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local_noret( +; GFX942-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_noret( ; GFX11-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 @@ -2285,9 +2285,9 @@ define float @test_atomicrmw_fadd_f32_local_ret(ptr addrspace(3) %ptr, float %va ; GFX90A-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 ; GFX90A-NEXT: ret float [[RET]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_ret( -; GFX940-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 -; GFX940-NEXT: ret float [[RET]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local_ret( +; GFX942-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 +; GFX942-NEXT: ret float [[RET]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_ret( ; GFX11-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4 @@ -2326,9 +2326,9 @@ define void @test_atomicrmw_fadd_f32_local_noret__amdgpu_ignore_denormal_mode(pt ; GFX90A-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_noret__amdgpu_ignore_denormal_mode( -; GFX940-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local_noret__amdgpu_ignore_denormal_mode( +; GFX942-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_noret__amdgpu_ignore_denormal_mode( ; GFX11-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] @@ -2367,9 +2367,9 @@ define float @test_atomicrmw_fadd_f32_local_ret__amdgpu_ignore_denormal_mode(ptr ; GFX90A-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret float [[RET]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f32_local_ret__amdgpu_ignore_denormal_mode( -; GFX940-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret float [[RET]] +; GFX942-LABEL: @test_atomicrmw_fadd_f32_local_ret__amdgpu_ignore_denormal_mode( +; GFX942-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret float [[RET]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f32_local_ret__amdgpu_ignore_denormal_mode( ; GFX11-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], float [[VALUE:%.*]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META0]] @@ -2516,9 +2516,9 @@ define void @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret(ptr addrspace( ; GFX90A-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret( -; GFX940-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret( +; GFX942-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8 @@ -2593,9 +2593,9 @@ define double @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret(ptr addrspace( ; GFX90A-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 ; GFX90A-NEXT: ret double [[RET]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret( -; GFX940-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 -; GFX940-NEXT: ret double [[RET]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret( +; GFX942-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8 +; GFX942-NEXT: ret double [[RET]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8 @@ -2670,9 +2670,9 @@ define void @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret__amdgpu_ignore ; GFX90A-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret__amdgpu_ignore_denormal_mode( -; GFX940-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret__amdgpu_ignore_denormal_mode( +; GFX942-NEXT: [[UNUSED:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_noret__amdgpu_ignore_denormal_mode( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8 @@ -2747,9 +2747,9 @@ define double @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret__amdgpu_ignore ; GFX90A-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret double [[RET]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret__amdgpu_ignore_denormal_mode( -; GFX940-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret double [[RET]] +; GFX942-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret__amdgpu_ignore_denormal_mode( +; GFX942-NEXT: [[RET:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], double [[VALUE:%.*]] monotonic, align 8, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret double [[RET]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_f64_dyndenorm_local_system_ret__amdgpu_ignore_denormal_mode( ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(3) [[PTR:%.*]], align 8 @@ -3031,9 +3031,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_flat_agent(ptr %ptr, <2 x half> %va ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 @@ -3120,9 +3120,9 @@ define void @test_atomicrmw_fadd_v2f16_flat_agent_noret(ptr %ptr, <2 x half> %va ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 @@ -3209,9 +3209,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent(ptr addrspace(1) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_global_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_global_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_global_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -3298,9 +3298,9 @@ define void @test_atomicrmw_fadd_v2f16_flat_global_noret(ptr addrspace(1) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_global_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_global_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_global_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -3387,9 +3387,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_local_agent(ptr addrspace(3) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_local_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_local_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_local_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(3) [[PTR:%.*]], align 4 @@ -3476,9 +3476,9 @@ define void @test_atomicrmw_fadd_v2f16_flat_local_noret(ptr addrspace(3) %ptr, < ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_local_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_local_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_local_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(3) [[PTR:%.*]], align 4 @@ -3565,9 +3565,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_flat_agent(ptr %ptr, <2 x bfloat ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr [[PTR:%.*]], align 4 @@ -3654,9 +3654,9 @@ define void @test_atomicrmw_fadd_v2bf16_flat_agent_noret(ptr %ptr, <2 x bfloat> ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr [[PTR:%.*]], align 4 @@ -3743,9 +3743,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent(ptr addrspace(1) %p ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -3832,9 +3832,9 @@ define void @test_atomicrmw_fadd_v2bf16_flat_global_noret(ptr addrspace(1) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_global_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_global_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_global_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -3921,9 +3921,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_local_agent(ptr addrspace(3) %pt ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_local_agent( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_local_agent( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_local_agent( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(3) [[PTR:%.*]], align 4 @@ -4010,9 +4010,9 @@ define void @test_atomicrmw_fadd_v2bf16_flat_local_noret(ptr addrspace(3) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_local_noret( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_local_noret( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(3) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_local_noret( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(3) [[PTR:%.*]], align 4 @@ -4099,9 +4099,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_flat_agent__unsafe(ptr %ptr, <2 x h ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 @@ -4188,9 +4188,9 @@ define void @test_atomicrmw_fadd_v2f16_flat_agent_noret__unsafe(ptr %ptr, <2 x h ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_flat_agent_noret__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 @@ -4265,9 +4265,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__unsafe(ptr addrspace( ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_global_agent__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_global_agent__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_global_agent__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -4330,9 +4330,9 @@ define void @test_atomicrmw_fadd_v2f16_global_agent_noret__unsafe(ptr addrspace( ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2f16_global_agent_noret__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2f16_global_agent_noret__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2f16_global_agent_noret__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -4419,9 +4419,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_flat_agent__unsafe(ptr %ptr, <2 ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr [[PTR:%.*]], align 4 @@ -4508,9 +4508,9 @@ define void @test_atomicrmw_fadd_v2bf16_flat_agent_noret__unsafe(ptr %ptr, <2 x ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_flat_agent_noret__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr [[PTR:%.*]], align 4 @@ -4597,9 +4597,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__unsafe(ptr addrspa ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR:%.*]], align 4 @@ -4686,9 +4686,9 @@ define void @test_atomicrmw_fadd_v2bf16_global_agent_noret__unsafe(ptr addrspace ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret void ; -; GFX940-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent_noret__unsafe( -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] -; GFX940-NEXT: ret void +; GFX942-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent_noret__unsafe( +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]] +; GFX942-NEXT: ret void ; ; GFX11-LABEL: @test_atomicrmw_fadd_v2bf16_global_agent_noret__unsafe( ; GFX11-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR:%.*]], align 4 diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll index b4025c3cfb53c..fdb4c592a6130 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -109,10 +109,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent(ptr addrspace(1) %p ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { @@ -226,10 +226,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_gra ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -343,10 +343,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_remote_m ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -460,10 +460,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_gra ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -577,10 +577,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_gra ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -694,10 +694,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_gra ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -811,10 +811,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -928,10 +928,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1045,10 +1045,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1162,10 +1162,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1279,10 +1279,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { @@ -1396,10 +1396,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_deno ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { @@ -1997,7 +1997,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll index 6700839d81480..2401418807788 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -131,10 +131,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_gr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -248,10 +248,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_remote_ ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -365,10 +365,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_gr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -482,10 +482,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_gr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -599,10 +599,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_gr ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2bf16_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -738,10 +738,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_den ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -855,10 +855,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_den ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -972,10 +972,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_den ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1089,10 +1089,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_den ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR1]] { @@ -1206,10 +1206,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_den ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX10-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR2]] { @@ -1807,7 +1807,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll index c28e11f60f389..59433db38443e 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -109,10 +109,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent(ptr addrspace(1) %ptr, ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { @@ -214,10 +214,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_graine ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -331,10 +331,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_remote_memo ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -436,10 +436,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_graine ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -541,10 +541,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_graine ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -646,10 +646,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_graine ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -763,10 +763,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -868,10 +868,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -985,10 +985,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1090,10 +1090,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1195,10 +1195,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { @@ -1300,10 +1300,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denorma ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { @@ -1901,7 +1901,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll index 19f02dea21329..148261cd0a678 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll @@ -3,7 +3,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX906 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX11 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=COMMON,GFX12 %s @@ -119,10 +119,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grain ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -236,10 +236,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_remote_mem ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -341,10 +341,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grain ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -446,10 +446,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grain ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { @@ -551,10 +551,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grain ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_v2f16_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { @@ -678,10 +678,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denorm ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -795,10 +795,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denorm ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -900,10 +900,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denorm ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1005,10 +1005,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denorm ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR1]] { @@ -1110,10 +1110,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denorm ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX10-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_system__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR2]] { @@ -1711,7 +1711,7 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } ;. ; GFX90A: [[META0]] = !{} ;. -; GFX940: [[META0]] = !{} +; GFX942: [[META0]] = !{} ;. ; GFX10: [[META0]] = !{} ;. diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll index 380f376ce9c80..7692fd34312ff 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll @@ -3,8 +3,8 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX900 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX908 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX90A %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX940 %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX942 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=ALL,GFX12 %s ; -------------------------------------------------------------------- @@ -594,24 +594,24 @@ define double @test_flat_atomicrmw_fadd_f64_agent(ptr %ptr, double %value) { ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) -; GFX940-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] -; GFX940: [[ATOMICRMW_PRIVATE]]: -; GFX940-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) -; GFX940-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: [[NEW:%.*]] = fadd double [[LOADED_PRIVATE]], [[VALUE]] -; GFX940-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: br label %[[ATOMICRMW_PHI:.*]] -; GFX940: [[ATOMICRMW_GLOBAL]]: -; GFX940-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: br label %[[ATOMICRMW_PHI]] -; GFX940: [[ATOMICRMW_PHI]]: -; GFX940-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ] -; GFX940-NEXT: br label %[[ATOMICRMW_END:.*]] -; GFX940: [[ATOMICRMW_END]]: -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; GFX942-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; GFX942: [[ATOMICRMW_PRIVATE]]: +; GFX942-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; GFX942-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: [[NEW:%.*]] = fadd double [[LOADED_PRIVATE]], [[VALUE]] +; GFX942-NEXT: store double [[NEW]], ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; GFX942: [[ATOMICRMW_GLOBAL]]: +; GFX942-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: br label %[[ATOMICRMW_PHI]] +; GFX942: [[ATOMICRMW_PHI]]: +; GFX942-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ] +; GFX942-NEXT: br label %[[ATOMICRMW_END:.*]] +; GFX942: [[ATOMICRMW_END]]: +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -705,10 +705,10 @@ define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -800,10 +800,10 @@ define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5__maybe_fi ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5__maybe_fine_grained( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5__maybe_fine_grained( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fadd_f64_agent__noalias_addrspace_5__maybe_fine_grained( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -905,10 +905,10 @@ define float @test_flat_atomicrmw_fadd_f32_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret float [[LOADED_PHI]] ; -; GFX940-LABEL: define float @test_flat_atomicrmw_fadd_f32_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]], !amdgpu.ignore.denormal.mode [[META1]] -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_flat_atomicrmw_fadd_f32_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]], !amdgpu.ignore.denormal.mode [[META1]] +; GFX942-NEXT: ret float [[RES]] ; ; GFX12-LABEL: define float @test_flat_atomicrmw_fadd_f32_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -988,10 +988,10 @@ define <2 x half> @test_flat_atomicrmw_fadd_v2f16_agent__noalias_addrspace_5(ptr ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret <2 x half> [[TMP5]] ; -; GFX940-LABEL: define <2 x half> @test_flat_atomicrmw_fadd_v2f16_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_flat_atomicrmw_fadd_v2f16_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: ret <2 x half> [[RES]] ; ; GFX12-LABEL: define <2 x half> @test_flat_atomicrmw_fadd_v2f16_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1071,10 +1071,10 @@ define <2 x bfloat> @test_flat_atomicrmw_fadd_v2bf16_agent__noalias_addrspace_5( ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_flat_atomicrmw_fadd_v2bf16_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_flat_atomicrmw_fadd_v2bf16_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; ; GFX12-LABEL: define <2 x bfloat> @test_flat_atomicrmw_fadd_v2bf16_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { @@ -1190,24 +1190,24 @@ define double @test_flat_atomicrmw_fmin_f64_agent(ptr %ptr, double %value) { ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) -; GFX940-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] -; GFX940: [[ATOMICRMW_PRIVATE]]: -; GFX940-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) -; GFX940-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED_PRIVATE]], double [[VALUE]]) -; GFX940-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: br label %[[ATOMICRMW_PHI:.*]] -; GFX940: [[ATOMICRMW_GLOBAL]]: -; GFX940-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: br label %[[ATOMICRMW_PHI]] -; GFX940: [[ATOMICRMW_PHI]]: -; GFX940-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP3]], %[[ATOMICRMW_GLOBAL]] ] -; GFX940-NEXT: br label %[[ATOMICRMW_END:.*]] -; GFX940: [[ATOMICRMW_END]]: -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; GFX942-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; GFX942: [[ATOMICRMW_PRIVATE]]: +; GFX942-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; GFX942-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED_PRIVATE]], double [[VALUE]]) +; GFX942-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; GFX942: [[ATOMICRMW_GLOBAL]]: +; GFX942-NEXT: [[TMP3:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: br label %[[ATOMICRMW_PHI]] +; GFX942: [[ATOMICRMW_PHI]]: +; GFX942-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP3]], %[[ATOMICRMW_GLOBAL]] ] +; GFX942-NEXT: br label %[[ATOMICRMW_END:.*]] +; GFX942: [[ATOMICRMW_END]]: +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1289,10 +1289,10 @@ define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1384,10 +1384,10 @@ define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5__maybe_fi ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5__maybe_fine_grained( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5__maybe_fine_grained( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmin_f64_agent__noalias_addrspace_5__maybe_fine_grained( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1467,22 +1467,22 @@ define float @test_flat_atomicrmw_fmin_f32_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret float [[LOADED_PHI]] ; -; GFX940-LABEL: define float @test_flat_atomicrmw_fmin_f32_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR]], align 4 -; GFX940-NEXT: br label %[[ATOMICRMW_START:.*]] -; GFX940: [[ATOMICRMW_START]]: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], %[[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]] -; GFX940: [[ATOMICRMW_END]]: -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_flat_atomicrmw_fmin_f32_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR]], align 4 +; GFX942-NEXT: br label %[[ATOMICRMW_START:.*]] +; GFX942: [[ATOMICRMW_START]]: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], %[[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]] +; GFX942: [[ATOMICRMW_END]]: +; GFX942-NEXT: ret float [[RES]] ; ; GFX12-LABEL: define float @test_flat_atomicrmw_fmin_f32_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -1598,24 +1598,24 @@ define double @test_flat_atomicrmw_fmax_f64_agent(ptr %ptr, double %value) { ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) -; GFX940-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] -; GFX940: [[ATOMICRMW_PRIVATE]]: -; GFX940-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) -; GFX940-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED_PRIVATE]], double [[VALUE]]) -; GFX940-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 -; GFX940-NEXT: br label %[[ATOMICRMW_PHI:.*]] -; GFX940: [[ATOMICRMW_GLOBAL]]: -; GFX940-NEXT: [[TMP3:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: br label %[[ATOMICRMW_PHI]] -; GFX940: [[ATOMICRMW_PHI]]: -; GFX940-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP3]], %[[ATOMICRMW_GLOBAL]] ] -; GFX940-NEXT: br label %[[ATOMICRMW_END:.*]] -; GFX940: [[ATOMICRMW_END]]: -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]]) +; GFX942-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]] +; GFX942: [[ATOMICRMW_PRIVATE]]: +; GFX942-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; GFX942-NEXT: [[LOADED_PRIVATE:%.*]] = load double, ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED_PRIVATE]], double [[VALUE]]) +; GFX942-NEXT: store double [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 +; GFX942-NEXT: br label %[[ATOMICRMW_PHI:.*]] +; GFX942: [[ATOMICRMW_GLOBAL]]: +; GFX942-NEXT: [[TMP3:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: br label %[[ATOMICRMW_PHI]] +; GFX942: [[ATOMICRMW_PHI]]: +; GFX942-NEXT: [[RES:%.*]] = phi double [ [[LOADED_PRIVATE]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP3]], %[[ATOMICRMW_GLOBAL]] ] +; GFX942-NEXT: br label %[[ATOMICRMW_END:.*]] +; GFX942: [[ATOMICRMW_END]]: +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1697,10 +1697,10 @@ define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] ; GFX90A-NEXT: ret double [[RES]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1792,10 +1792,10 @@ define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5__maybe_fi ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret double [[TMP5]] ; -; GFX940-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5__maybe_fine_grained( -; GFX940-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] -; GFX940-NEXT: ret double [[RES]] +; GFX942-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5__maybe_fine_grained( +; GFX942-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]] +; GFX942-NEXT: ret double [[RES]] ; ; GFX12-LABEL: define double @test_flat_atomicrmw_fmax_f64_agent__noalias_addrspace_5__maybe_fine_grained( ; GFX12-SAME: ptr [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { @@ -1875,22 +1875,22 @@ define float @test_flat_atomicrmw_fmax_f32_agent__noalias_addrspace_5(ptr %ptr, ; GFX90A: [[ATOMICRMW_END]]: ; GFX90A-NEXT: ret float [[LOADED_PHI]] ; -; GFX940-LABEL: define float @test_flat_atomicrmw_fmax_f32_agent__noalias_addrspace_5( -; GFX940-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR]], align 4 -; GFX940-NEXT: br label %[[ATOMICRMW_START:.*]] -; GFX940: [[ATOMICRMW_START]]: -; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], %[[ATOMICRMW_START]] ] -; GFX940-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) -; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 -; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] -; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX940-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float -; GFX940-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]] -; GFX940: [[ATOMICRMW_END]]: -; GFX940-NEXT: ret float [[RES]] +; GFX942-LABEL: define float @test_flat_atomicrmw_fmax_f32_agent__noalias_addrspace_5( +; GFX942-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR]], align 4 +; GFX942-NEXT: br label %[[ATOMICRMW_START:.*]] +; GFX942: [[ATOMICRMW_START]]: +; GFX942-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], %[[ATOMICRMW_START]] ] +; GFX942-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE]]) +; GFX942-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; GFX942-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX942-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4, !noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]] +; GFX942-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX942-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX942-NEXT: [[RES]] = bitcast i32 [[NEWLOADED]] to float +; GFX942-NEXT: br i1 [[SUCCESS]], label %[[ATOMICRMW_END:.*]], label %[[ATOMICRMW_START]] +; GFX942: [[ATOMICRMW_END]]: +; GFX942-NEXT: ret float [[RES]] ; ; GFX12-LABEL: define float @test_flat_atomicrmw_fmax_f32_agent__noalias_addrspace_5( ; GFX12-SAME: ptr [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] { @@ -2033,11 +2033,11 @@ define i32 @test_flat_atomicrmw_nand_i32_agent__noalias_addrspace_5(ptr %ptr, i3 ; GFX90A: [[META3]] = !{!"foo", !"bar"} ; GFX90A: [[META4]] = !{!"bux", !"baz"} ;. -; GFX940: [[META0]] = !{i32 5, i32 6} -; GFX940: [[META1]] = !{} -; GFX940: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} -; GFX940: [[META3]] = !{!"foo", !"bar"} -; GFX940: [[META4]] = !{!"bux", !"baz"} +; GFX942: [[META0]] = !{i32 5, i32 6} +; GFX942: [[META1]] = !{} +; GFX942: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]} +; GFX942: [[META3]] = !{!"foo", !"bar"} +; GFX942: [[META4]] = !{!"bux", !"baz"} ;. ; GFX12: [[META0]] = !{i32 5, i32 6} ; GFX12: [[META1]] = !{} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll index 97c5a77083f5c..5736a3fe38590 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx900 %s | FileCheck -check-prefixes=CHECK,GFX900 %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx90a %s | FileCheck -check-prefixes=CHECK,GFX90A %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx940 %s | FileCheck -check-prefixes=CHECK,GFX940 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx942 %s | FileCheck -check-prefixes=CHECK,GFX942 %s ;--------------------------------------------------------------------- ; atomicrmw fadd @@ -104,10 +104,10 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1 ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x half> [[RES]] ; -; GFX940-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x half> [[RES]] +; GFX942-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x half> [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x half> [[RES]] ; %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4 ret <2 x half> %res @@ -148,10 +148,10 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(ptr addrspac ; GFX90A: atomicrmw.end: ; GFX90A-NEXT: ret <2 x bfloat> [[TMP5]] ; -; GFX940-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4( -; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { -; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4 -; GFX940-NEXT: ret <2 x bfloat> [[RES]] +; GFX942-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4( +; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], <2 x bfloat> [[VALUE]] syncscope("agent") seq_cst, align 4 +; GFX942-NEXT: ret <2 x bfloat> [[RES]] ; %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4 ret <2 x bfloat> %res diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-integer-ops-0-to-add-0.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-integer-ops-0-to-add-0.ll index 2bfcc5897c382..dc1107d9130d5 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-integer-ops-0-to-add-0.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-integer-ops-0-to-add-0.ll @@ -4,7 +4,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx90a %s | FileCheck -check-prefixes=CHECK,GFX90A %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1030 %s | FileCheck -check-prefixes=CHECK,GFX10 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1100 %s | FileCheck -check-prefixes=CHECK,GFX11 %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx940 %s | FileCheck -check-prefixes=CHECK,GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx942 %s | FileCheck -check-prefixes=CHECK,GFX942 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1200 %s | FileCheck -check-prefixes=CHECK,GFX12 %s ; Test that system scoped atomicrmw or 0 is transformed to add 0. @@ -332,4 +332,4 @@ define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_fine_grained_memory__am ; GFX803: {{.*}} ; GFX900: {{.*}} ; GFX90A: {{.*}} -; GFX940: {{.*}} +; GFX942: {{.*}} diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll index 6f3d2cb69090e..7fe036df4d92a 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx942 -passes=load-store-vectorizer -S -o - %s | FileCheck %s ; Don't crash when checking for misaligned accesses with sub-byte size. diff --git a/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s b/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s index 8317469699a10..4378dc979fc72 100644 --- a/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx942 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s v_pk_mov_b32 v[0:1], v[2:3], v[4:5] v_pk_add_f32 v[0:1], v[0:1], v[0:1] diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx940-mfma.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx942-mfma.s similarity index 99% rename from llvm/test/tools/llvm-mca/AMDGPU/gfx940-mfma.s rename to llvm/test/tools/llvm-mca/AMDGPU/gfx942-mfma.s index 0e1efbe90805b..880946e937eb8 100644 --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx940-mfma.s +++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx942-mfma.s @@ -1,4 +1,4 @@ -# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx942 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s # CHECK: Iterations: 1 # CHECK: Instructions: 78 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index 8d5307372a303..0e392e2287151 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -157,16 +157,6 @@ define amdgpu_kernel void @test_kernel() { ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -filetype=obj -O0 -o %t.o %s -; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx941 %t.o > %t-specify.txt -; RUN: llvm-objdump -D %t.o > %t-detect.txt -; RUN: diff %t-specify.txt %t-detect.txt - -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -O0 -o %t.o %s -; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx940 %t.o > %t-specify.txt -; RUN: llvm-objdump -D %t.o > %t-detect.txt -; RUN: diff %t-specify.txt %t-detect.txt - ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx90c %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test index 7de64a6edfe2e..dd9eaaeabd8c3 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test @@ -196,24 +196,6 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32 -# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40 - -# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40 - -# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40 - -# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -DFLAG_VALUE=0x4B - -# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -DFLAG_VALUE=0x4B - -# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -DFLAG_VALUE=0x4B - # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C